Series comparison

-[PULL 00/56] tcg patch queue
+[PULL 00/68] tcg patch queue
-The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:
+Note that I have refreshed the expiry of my public key.
+and pushed to keys.openpgp.org.
-  Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)
 r~
 The following changes since commit 4d5d933bbc7cc52f6cc6b9021f91fa06266222d5:
   Merge tag 'pull-xenfv-20250116' of git://git.infradead.org/users/dwmw2/qemu into staging (2025-01-16 09:03:43 -0500)
 are available in the Git repository at:
-  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211027
+  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20250117
-for you to fetch changes up to 820c025f0dcacf2f3c12735b1f162893fbfa7bc6:
+for you to fetch changes up to db1649823d4f27b924a5aa5f9e0111457accb798:
-  tcg/optimize: Propagate sign info for shifting (2021-10-27 17:11:23 -0700)
+  softfloat: Constify helpers returning float_status field (2025-01-17 08:29:25 -0800)
 ----------------------------------------------------------------
-Improvements to qemu/int128
+tcg:
-Fixes for 128/64 division.
+  - Add TCGOP_TYPE, TCGOP_FLAGS.
-Cleanup tcg/optimize.c
+  - Pass type and flags to tcg_op_supported, tcg_target_op_def.
-Optimize redundant sign extensions
+  - Split out tcg-target-has.h and unexport from tcg.h.
   - Reorg constraint processing; constify TCGOpDef.
   - Make extract, sextract, deposit opcodes mandatory.
   - Merge ext{8,16,32}{s,u} opcodes into {s}extract.
 tcg/mips: Expand bswap unconditionally
 tcg/riscv: Use SRAIW, SRLIW for {s}extract_i64
 tcg/riscv: Use BEXTI for single-bit extractions
 tcg/sparc64: Use SRA, SRL for {s}extract_i64
 disas/riscv: Guard dec->cfg dereference for host disassemble
 util/cpuinfo-riscv: Detect Zbs
 accel/tcg: Call tcg_tb_insert() for one-insn TBs
 linux-user: Add missing /proc/cpuinfo fields for sparc
 ----------------------------------------------------------------
-Frédéric Pétrot (1):
+Helge Deller (1):
-      qemu/int128: Add int128_{not,xor}
+      linux-user: Add missing /proc/cpuinfo fields for sparc
-Luis Pires (4):
+Ilya Leoshkevich (2):
-      host-utils: move checks out of divu128/divs128
+      tcg: Document tb_lookup() and tcg_tb_lookup()
-      host-utils: move udiv_qrnnd() to host-utils
+      accel/tcg: Call tcg_tb_insert() for one-insn TBs
-      host-utils: add 128-bit quotient support to divu128/divs128
-      host-utils: add unit tests for divu128/divs128
+LIU Zhiwei (1):
+      disas/riscv: Guard dec->cfg dereference for host disassemble
-Richard Henderson (51):
-      tcg/optimize: Rename "mask" to "z_mask"
+Philippe Mathieu-Daudé (1):
-      tcg/optimize: Split out OptContext
+      softfloat: Constify helpers returning float_status field
-      tcg/optimize: Remove do_default label
-      tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
+Richard Henderson (63):
-      tcg/optimize: Move prev_mb into OptContext
+      tcg: Move call abi parameters from tcg-target.h to tcg-target.c.inc
-      tcg/optimize: Split out init_arguments
+      tcg: Replace TCGOP_VECL with TCGOP_TYPE
-      tcg/optimize: Split out copy_propagate
+      tcg: Move tcg_op_insert_{after,before} decls to tcg-internal.h
-      tcg/optimize: Split out fold_call
+      tcg: Copy TCGOP_TYPE in tcg_op_insert_{after,before}
-      tcg/optimize: Drop nb_oargs, nb_iargs locals
+      tcg: Add TCGOP_FLAGS
-      tcg/optimize: Change fail return for do_constant_folding_cond*
+      tcg: Add type and flags arguments to tcg_op_supported
-      tcg/optimize: Return true from tcg_opt_gen_{mov,movi}
+      target/arm: Do not test TCG_TARGET_HAS_bitsel_vec
-      tcg/optimize: Split out finish_folding
+      target/arm: Use tcg_op_supported
-      tcg/optimize: Use a boolean to avoid a mass of continues
+      target/tricore: Use tcg_op_supported
-      tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
+      tcg: Add tcg_op_deposit_valid
-      tcg/optimize: Split out fold_const{1,2}
+      target/i386: Remove TCG_TARGET_extract_tl_valid
-      tcg/optimize: Split out fold_setcond2
+      target/i386: Use tcg_op_deposit_valid
-      tcg/optimize: Split out fold_brcond2
+      target/i386: Use tcg_op_supported
-      tcg/optimize: Split out fold_brcond
+      tcg: Remove TCG_TARGET_NEED_LDST_LABELS and TCG_TARGET_NEED_POOL_LABELS
-      tcg/optimize: Split out fold_setcond
+      tcg: Rename tcg-target.opc.h to tcg-target-opc.h.inc
-      tcg/optimize: Split out fold_mulu2_i32
+      tcg/tci: Move TCI specific opcodes to tcg-target-opc.h.inc
-      tcg/optimize: Split out fold_addsub2_i32
+      tcg: Move fallback tcg_can_emit_vec_op out of line
-      tcg/optimize: Split out fold_movcond
+      tcg/ppc: Remove TCGPowerISA enum
-      tcg/optimize: Split out fold_extract2
+      tcg: Extract default TCG_TARGET_HAS_foo definitions to 'tcg-has.h'
-      tcg/optimize: Split out fold_extract, fold_sextract
+      tcg/aarch64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_deposit
+      tcg/arm: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_count_zeros
+      tcg/i386: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_bswap
+      tcg/loongarch64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_dup, fold_dup2
+      tcg/mips: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_mov
+      tcg/ppc: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_xx_to_i
+      tcg/riscv: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_xx_to_x
+      tcg/s390x: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_xi_to_i
+      tcg/sparc64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Add type to OptContext
+      tcg/tci: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-      tcg/optimize: Split out fold_to_not
+      tcg: Include 'tcg-target-has.h' once in 'tcg-has.h'
-      tcg/optimize: Split out fold_sub_to_neg
+      tcg: Only include 'tcg-has.h' when necessary
-      tcg/optimize: Split out fold_xi_to_x
+      tcg: Split out tcg-target-mo.h
-      tcg/optimize: Split out fold_ix_to_i
+      tcg: Use C_NotImplemented in tcg_target_op_def
-      tcg/optimize: Split out fold_masks
+      tcg: Change have_vec to has_type in tcg_op_supported
-      tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
+      tcg: Reorg process_op_defs
-      tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
+      tcg: Remove args_ct from TCGOpDef
-      tcg/optimize: Sink commutative operand swapping into fold functions
+      tcg: Constify tcg_op_defs
-      tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
+      tcg: Validate op supported in opcode_args_ct
-      tcg/optimize: Use fold_xx_to_i for orc
+      tcg: Add TCG_OPF_NOT_PRESENT to opcodes without inputs or outputs
-      tcg/optimize: Use fold_xi_to_x for mul
+      tcg: Pass type and flags to tcg_target_op_def
-      tcg/optimize: Use fold_xi_to_x for div
+      tcg: Add TCGType argument to tcg_out_op
-      tcg/optimize: Use fold_xx_to_i for rem
+      tcg: Remove TCG_OPF_64BIT
-      tcg/optimize: Optimize sign extensions
+      tcg: Drop implementation checks from tcg-opc.h
-      tcg/optimize: Propagate sign info for logical operations
+      tcg: Replace IMPLVEC with TCG_OPF_VECTOR
-      tcg/optimize: Propagate sign info for setcond
+      tcg/mips: Expand bswap unconditionally
-      tcg/optimize: Propagate sign info for bit counting
+      tcg/i386: Handle all 8-bit extensions for i686
-      tcg/optimize: Propagate sign info for shifting
+      tcg/i386: Fold the ext{8,16,32}[us] cases into {s}extract
+      tcg/aarch64: Provide TCG_TARGET_{s}extract_valid
- include/fpu/softfloat-macros.h |   82 --
+      tcg/aarch64: Expand extract with offset 0 with andi
- include/hw/clock.h             |    5 +-
+      tcg/arm: Add full [US]XT[BH] into {s}extract
- include/qemu/host-utils.h      |  121 +-
+      tcg/loongarch64: Fold the ext{8,16,32}[us] cases into {s}extract
- include/qemu/int128.h          |   20 +
+      tcg/mips: Fold the ext{8,16,32}[us] cases into {s}extract
- target/ppc/int_helper.c        |   23 +-
+      tcg/ppc: Fold the ext{8,16,32}[us] cases into {s}extract
- tcg/optimize.c                 | 2644 ++++++++++++++++++++++++----------------
+      tcg/riscv64: Fold the ext{8,16,32}[us] cases into {s}extract
- tests/unit/test-div128.c       |  197 +++
+      tcg/riscv: Use SRAIW, SRLIW for {s}extract_i64
- util/host-utils.c              |  147 ++-
+      tcg/s390x: Fold the ext{8,16,32}[us] cases into {s}extract
- tests/unit/meson.build         |    1 +
+      tcg/sparc64: Use SRA, SRL for {s}extract_i64
-files changed, 2053 insertions(+), 1187 deletions(-)
+      tcg/tci: Provide TCG_TARGET_{s}extract_valid
- create mode 100644 tests/unit/test-div128.c
+      tcg/tci: Remove assertions for deposit and extract
+      tcg: Remove TCG_TARGET_HAS_{s}extract_{i32,i64}
       tcg: Remove TCG_TARGET_HAS_deposit_{i32,i64}
       util/cpuinfo-riscv: Detect Zbs
       tcg/riscv: Use BEXTI for single-bit extractions
  accel/tcg/internal-target.h                        |   1 +
  host/include/riscv/host/cpuinfo.h                  |   5 +-
  include/fpu/softfloat-helpers.h                    |  25 +-
  include/tcg/tcg-opc.h                              | 355 +++++-------
  include/tcg/tcg.h                                  | 187 ++----
  linux-user/sparc/target_proc.h                     |  20 +-
  tcg/aarch64/tcg-target-has.h                       | 117 ++++
  tcg/aarch64/tcg-target-mo.h                        |  12 +
  tcg/aarch64/tcg-target.h                           | 126 ----
  tcg/arm/tcg-target-has.h                           | 100 ++++
  tcg/arm/tcg-target-mo.h                            |  13 +
  tcg/arm/tcg-target.h                               |  86 ---
  tcg/i386/tcg-target-has.h                          | 169 ++++++
  tcg/i386/tcg-target-mo.h                           |  19 +
  tcg/i386/tcg-target.h                              | 162 ------
  tcg/loongarch64/tcg-target-has.h                   | 119 ++++
  tcg/loongarch64/tcg-target-mo.h                    |  12 +
  tcg/loongarch64/tcg-target.h                       | 115 ----
  tcg/mips/tcg-target-has.h                          | 135 +++++
  tcg/mips/tcg-target-mo.h                           |  13 +
  tcg/mips/tcg-target.h                              | 130 -----
  tcg/ppc/tcg-target-has.h                           | 131 +++++
  tcg/ppc/tcg-target-mo.h                            |  12 +
  tcg/ppc/tcg-target.h                               | 126 ----
  tcg/riscv/tcg-target-has.h                         | 135 +++++
  tcg/riscv/tcg-target-mo.h                          |  12 +
  tcg/riscv/tcg-target.h                             | 116 ----
  tcg/s390x/tcg-target-has.h                         | 137 +++++
  tcg/s390x/tcg-target-mo.h                          |  12 +
  tcg/s390x/tcg-target.h                             | 126 ----
  tcg/sparc64/tcg-target-has.h                       |  87 +++
  tcg/sparc64/tcg-target-mo.h                        |  12 +
  tcg/sparc64/tcg-target.h                           |  91 ---
  tcg/tcg-has.h                                      | 101 ++++
  tcg/tcg-internal.h                                 |  18 +-
  tcg/tci/tcg-target-has.h                           |  81 +++
  tcg/tci/tcg-target-mo.h                            |  17 +
  tcg/tci/tcg-target.h                               |  94 ---
  accel/tcg/cpu-exec.c                               |  15 +-
  accel/tcg/translate-all.c                          |  29 +-
  disas/riscv.c                                      |  23 +-
  target/arm/tcg/translate-a64.c                     |  10 +-
  target/arm/tcg/translate-sve.c                     |  22 +-
  target/arm/tcg/translate.c                         |   2 +-
  target/tricore/translate.c                         |   4 +-
  tcg/optimize.c                                     |  27 +-
  tcg/tcg-common.c                                   |   5 +-
  tcg/tcg-op-gvec.c                                  |   1 +
  tcg/tcg-op-ldst.c                                  |  29 +-
  tcg/tcg-op-vec.c                                   |   9 +-
  tcg/tcg-op.c                                       | 149 ++---
  tcg/tcg.c                                          | 643 ++++++++++++++-------
  tcg/tci.c                                          |  13 +-
  util/cpuinfo-riscv.c                               |  18 +-
  docs/devel/tcg-ops.rst                             |  15 +-
  target/i386/tcg/emit.c.inc                         |  14 +-
  .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |   4 +-
  tcg/aarch64/tcg-target.c.inc                       |  33 +-
  tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} |   6 +-
  tcg/arm/tcg-target.c.inc                           |  71 ++-
  .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |  22 +-
  tcg/i386/tcg-target.c.inc                          | 121 +++-
  .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |   0
  tcg/loongarch64/tcg-target.c.inc                   |  59 +-
  tcg/mips/tcg-target-opc.h.inc                      |   1 +
  tcg/mips/tcg-target.c.inc                          |  55 +-
  tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} |  12 +-
  tcg/ppc/tcg-target.c.inc                           |  39 +-
  .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |   0
  tcg/riscv/tcg-target.c.inc                         |  66 ++-
  .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |   6 +-
  tcg/s390x/tcg-target.c.inc                         |  59 +-
  tcg/sparc64/tcg-target-opc.h.inc                   |   1 +
  tcg/sparc64/tcg-target.c.inc                       |  29 +-
  tcg/tcg-ldst.c.inc                                 |  65 ---
  tcg/tcg-pool.c.inc                                 | 162 ------
  tcg/tci/tcg-target-opc.h.inc                       |   4 +
  tcg/tci/tcg-target.c.inc                           |  53 +-
 files changed, 2856 insertions(+), 2269 deletions(-)
  create mode 100644 tcg/aarch64/tcg-target-has.h
  create mode 100644 tcg/aarch64/tcg-target-mo.h
  create mode 100644 tcg/arm/tcg-target-has.h
  create mode 100644 tcg/arm/tcg-target-mo.h
  create mode 100644 tcg/i386/tcg-target-has.h
  create mode 100644 tcg/i386/tcg-target-mo.h
  create mode 100644 tcg/loongarch64/tcg-target-has.h
  create mode 100644 tcg/loongarch64/tcg-target-mo.h
  create mode 100644 tcg/mips/tcg-target-has.h
  create mode 100644 tcg/mips/tcg-target-mo.h
  create mode 100644 tcg/ppc/tcg-target-has.h
  create mode 100644 tcg/ppc/tcg-target-mo.h
  create mode 100644 tcg/riscv/tcg-target-has.h
  create mode 100644 tcg/riscv/tcg-target-mo.h
  create mode 100644 tcg/s390x/tcg-target-has.h
  create mode 100644 tcg/s390x/tcg-target-mo.h
  create mode 100644 tcg/sparc64/tcg-target-has.h
  create mode 100644 tcg/sparc64/tcg-target-mo.h
  create mode 100644 tcg/tcg-has.h
  create mode 100644 tcg/tci/tcg-target-has.h
  create mode 100644 tcg/tci/tcg-target-mo.h
  rename tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (82%)
  rename tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} (75%)
  rename tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc} (72%)
  rename tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  create mode 100644 tcg/mips/tcg-target-opc.h.inc
  rename tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} (83%)
  rename tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  rename tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc} (75%)
  create mode 100644 tcg/sparc64/tcg-target-opc.h.inc
  delete mode 100644 tcg/tcg-ldst.c.inc
  delete mode 100644 tcg/tcg-pool.c.inc
  create mode 100644 tcg/tci/tcg-target-opc.h.inc

-New patch
+[PULL 01/68] disas/riscv: Guard dec->cfg dereference for host disassemble
+From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
+For riscv host, it will set dec->cfg to zero. Thus we shuld guard
+the dec->cfg deference for riscv host disassemble.
+And in general, we should only use dec->cfg for target in three cases:
+) For not incompatible encodings, such as zcmp/zcmt/zfinx.
+) For maybe-ops encodings, they are better to be disassembled to
+   the "real" extensions, such as zicfiss. The guard of dec->zimop
+   and dec->zcmop is for comment and avoid check for every extension
+   that encoded in maybe-ops area.
+) For custom encodings, we have to use dec->cfg to disassemble
+   custom encodings using the same encoding area.
+Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
+Suggested-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-ID: <20241206032411.52528-1-zhiwei_liu@linux.alibaba.com>
+---
+ disas/riscv.c | 23 ++++++++++++-----------
+file changed, 12 insertions(+), 11 deletions(-)
+diff --git a/disas/riscv.c b/disas/riscv.c
+index XXXXXXX..XXXXXXX 100644
+--- a/disas/riscv.c
++++ b/disas/riscv.c
+@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
+             break;
+         case 2: op = rv_op_c_li; break;
+         case 3:
+-            if (dec->cfg->ext_zcmop) {
++            if (dec->cfg && dec->cfg->ext_zcmop) {
+                 if ((((inst >> 2) & 0b111111) == 0b100000) &&
+                     (((inst >> 11) & 0b11) == 0b0)) {
+                     unsigned int cmop_code = 0;
+@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
+                 op = rv_op_c_sqsp;
+             } else {
+                 op = rv_op_c_fsdsp;
+-                if (dec->cfg->ext_zcmp && ((inst >> 12) & 0b01)) {
++                if (dec->cfg && dec->cfg->ext_zcmp && ((inst >> 12) & 0b01)) {
+                     switch ((inst >> 8) & 0b01111) {
+                     case 8:
+                         if (((inst >> 4) & 0b01111) >= 4) {
+@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
+                 } else {
+                     switch ((inst >> 10) & 0b011) {
+                     case 0:
+-                        if (!dec->cfg->ext_zcmt) {
++                        if (dec->cfg && !dec->cfg->ext_zcmt) {
+                             break;
+                         }
+                         if (((inst >> 2) & 0xFF) >= 32) {
+@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
+                         }
+                         break;
+                     case 3:
+-                        if (!dec->cfg->ext_zcmp) {
++                        if (dec->cfg && !dec->cfg->ext_zcmp) {
+                             break;
+                         }
+                         switch ((inst >> 5) & 0b011) {
+@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
+             break;
+         case 5:
+             op = rv_op_auipc;
+-            if (dec->cfg->ext_zicfilp &&
++            if (dec->cfg && dec->cfg->ext_zicfilp &&
+                 (((inst >> 7) & 0b11111) == 0b00000)) {
+                 op = rv_op_lpad;
+             }
+@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
+             case 2: op = rv_op_csrrs; break;
+             case 3: op = rv_op_csrrc; break;
+             case 4:
+-                if (dec->cfg->ext_zimop) {
++                if (dec->cfg && dec->cfg->ext_zimop) {
+                     int imm_mop5, imm_mop3, reg_num;
+                     if ((extract32(inst, 22, 10) & 0b1011001111)
+                         == 0b1000000111) {
+@@ -XXX,XX +XXX,XX @@ static GString *format_inst(size_t tab, rv_decode *dec)
+             g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
+             break;
+         case '3':
+-            if (dec->cfg->ext_zfinx) {
++            if (dec->cfg && dec->cfg->ext_zfinx) {
+                 g_string_append(buf, rv_ireg_name_sym[dec->rd]);
+             } else {
+                 g_string_append(buf, rv_freg_name_sym[dec->rd]);
+             }
+             break;
+         case '4':
+-            if (dec->cfg->ext_zfinx) {
++            if (dec->cfg && dec->cfg->ext_zfinx) {
+                 g_string_append(buf, rv_ireg_name_sym[dec->rs1]);
+             } else {
+                 g_string_append(buf, rv_freg_name_sym[dec->rs1]);
+             }
+             break;
+         case '5':
+-            if (dec->cfg->ext_zfinx) {
++            if (dec->cfg && dec->cfg->ext_zfinx) {
+                 g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
+             } else {
+                 g_string_append(buf, rv_freg_name_sym[dec->rs2]);
+             }
+             break;
+         case '6':
+-            if (dec->cfg->ext_zfinx) {
++            if (dec->cfg && dec->cfg->ext_zfinx) {
+                 g_string_append(buf, rv_ireg_name_sym[dec->rs3]);
+             } else {
+                 g_string_append(buf, rv_freg_name_sym[dec->rs3]);
+@@ -XXX,XX +XXX,XX @@ static GString *disasm_inst(rv_isa isa, uint64_t pc, rv_inst inst,
+         const rv_opcode_data *opcode_data = decoders[i].opcode_data;
+         void (*decode_func)(rv_decode *, rv_isa) = decoders[i].decode_func;
+-        if (guard_func(cfg)) {
++        /* always_true_p don't dereference cfg */
++        if (((i == 0) || cfg) && guard_func(cfg)) {
+             dec.opcode_data = opcode_data;
+             decode_func(&dec, isa);
+             if (dec.op != rv_op_illegal)
+--
+.43.0

-[PULL 34/56] tcg/optimize: Split out fold_mov
+[PULL 02/68] tcg: Move call abi parameters from tcg-target.h to tcg-target.c.inc
-This is the final entry in the main switch that was in a
+These defines are not required outside of tcg/tcg.c,
-different form.  After this, we have the option to convert
+which includes tcg-target.c.inc before use.
-the switch into a function dispatch table.
+Reduces the exported symbol set of tcg-target.h.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 27 ++++++++++++++-------------
+ tcg/aarch64/tcg-target.h         | 13 -------------
-file changed, 14 insertions(+), 13 deletions(-)
+ tcg/arm/tcg-target.h             |  8 --------
  tcg/i386/tcg-target.h            | 20 --------------------
  tcg/loongarch64/tcg-target.h     |  9 ---------
  tcg/mips/tcg-target.h            | 14 --------------
  tcg/riscv/tcg-target.h           |  9 ---------
  tcg/s390x/tcg-target.h           |  8 --------
  tcg/sparc64/tcg-target.h         | 11 -----------
  tcg/tci/tcg-target.h             | 14 --------------
  tcg/aarch64/tcg-target.c.inc     | 13 +++++++++++++
  tcg/arm/tcg-target.c.inc         |  8 ++++++++
  tcg/i386/tcg-target.c.inc        | 20 ++++++++++++++++++++
  tcg/loongarch64/tcg-target.c.inc |  9 +++++++++
  tcg/mips/tcg-target.c.inc        | 14 ++++++++++++++
  tcg/riscv/tcg-target.c.inc       |  9 +++++++++
  tcg/s390x/tcg-target.c.inc       |  8 ++++++++
  tcg/sparc64/tcg-target.c.inc     | 10 ++++++++++
  tcg/tci/tcg-target.c.inc         | 14 ++++++++++++++
 files changed, 105 insertions(+), 106 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/aarch64/tcg-target.h
-+++ b/tcg/optimize.c
++++ b/tcg/aarch64/tcg-target.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ typedef enum {
-     return true;
- }
+ #define TCG_TARGET_NB_REGS 64
-+static bool fold_mov(OptContext *ctx, TCGOp *op)
+-/* used for function call generation */
-+{
+-#define TCG_REG_CALL_STACK              TCG_REG_SP
-+    return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+-#define TCG_TARGET_STACK_ALIGN          16
-+}
+-#define TCG_TARGET_CALL_STACK_OFFSET    0
-+
+-#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
- static bool fold_movcond(OptContext *ctx, TCGOp *op)
+-#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 -#ifdef CONFIG_DARWIN
 -# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
 -#else
 -# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
 -#endif
 -#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 -
  #define have_lse    (cpuinfo & CPUINFO_LSE)
  #define have_lse2   (cpuinfo & CPUINFO_LSE2)
 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.h
 +++ b/tcg/arm/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool use_idiv_instructions;
  extern bool use_neon_instructions;
  #endif
 -/* used for function call generation */
 -#define TCG_TARGET_STACK_ALIGN        8
 -#define TCG_TARGET_CALL_STACK_OFFSET    0
 -#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_EVEN
 -#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_EVEN
 -#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_BY_REF
 -
  /* optional instructions */
  #define TCG_TARGET_HAS_ext8s_i32        1
  #define TCG_TARGET_HAS_ext16s_i32       1
 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.h
 +++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_REG_CALL_STACK = TCG_REG_ESP
  } TCGReg;
 -/* used for function call generation */
 -#define TCG_TARGET_STACK_ALIGN 16
 -#if defined(_WIN64)
 -#define TCG_TARGET_CALL_STACK_OFFSET 32
 -#else
 -#define TCG_TARGET_CALL_STACK_OFFSET 0
 -#endif
 -#define TCG_TARGET_CALL_ARG_I32      TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_NORMAL
 -#if defined(_WIN64)
 -# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_BY_REF
 -# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_BY_VEC
 -#elif TCG_TARGET_REG_BITS == 64
 -# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_NORMAL
 -# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_NORMAL
 -#else
 -# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_NORMAL
 -# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_BY_REF
 -#endif
 -
  #define have_bmi1         (cpuinfo & CPUINFO_BMI1)
  #define have_popcnt       (cpuinfo & CPUINFO_POPCNT)
  #define have_avx1         (cpuinfo & CPUINFO_AVX1)
 diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.h
 +++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_VEC_TMP0 = TCG_REG_V23,
  } TCGReg;
 -/* used for function call generation */
 -#define TCG_REG_CALL_STACK              TCG_REG_SP
 -#define TCG_TARGET_STACK_ALIGN          16
 -#define TCG_TARGET_CALL_STACK_OFFSET    0
 -#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 -
  /* optional instructions */
  #define TCG_TARGET_HAS_negsetcond_i32   0
  #define TCG_TARGET_HAS_div_i32          1
 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.h
 +++ b/tcg/mips/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_AREG0 = TCG_REG_S8,
  } TCGReg;
 -/* used for function call generation */
 -#define TCG_TARGET_STACK_ALIGN        16
 -#if _MIPS_SIM == _ABIO32
 -# define TCG_TARGET_CALL_STACK_OFFSET 16
 -# define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_EVEN
 -# define TCG_TARGET_CALL_RET_I128     TCG_CALL_RET_BY_REF
 -#else
 -# define TCG_TARGET_CALL_STACK_OFFSET 0
 -# define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_NORMAL
 -# define TCG_TARGET_CALL_RET_I128     TCG_CALL_RET_NORMAL
 -#endif
 -#define TCG_TARGET_CALL_ARG_I32       TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I128      TCG_CALL_ARG_EVEN
 -
  /* MOVN/MOVZ instructions detection */
  #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
      defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target.h
 +++ b/tcg/riscv/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_REG_TMP2       = TCG_REG_T4,
  } TCGReg;
 -/* used for function call generation */
 -#define TCG_REG_CALL_STACK              TCG_REG_SP
 -#define TCG_TARGET_STACK_ALIGN          16
 -#define TCG_TARGET_CALL_STACK_OFFSET    0
 -#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 -
  /* optional instructions */
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_div_i32          1
 diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target.h
 +++ b/tcg/s390x/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
  #define TCG_TARGET_HAS_cmpsel_vec     1
  #define TCG_TARGET_HAS_tst_vec        0
 -/* used for function call generation */
 -#define TCG_TARGET_STACK_ALIGN        8
 -#define TCG_TARGET_CALL_STACK_OFFSET    160
 -#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_EXTEND
 -#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_BY_REF
 -#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_BY_REF
 -
  #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
  #define TCG_TARGET_NEED_LDST_LABELS
  #define TCG_TARGET_NEED_POOL_LABELS
 diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc64/tcg-target.h
 +++ b/tcg/sparc64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_REG_I7,
  } TCGReg;
 -/* used for function call generation */
 -#define TCG_REG_CALL_STACK TCG_REG_O6
 -
 -#define TCG_TARGET_STACK_BIAS           2047
 -#define TCG_TARGET_STACK_ALIGN          16
 -#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
 -#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_EXTEND
 -#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
 -#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 -
  #if defined(__VIS__) && __VIS__ >= 0x300
  #define use_vis3_instructions  1
  #else
 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tci/tcg-target.h
 +++ b/tcg/tci/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_REG_CALL_STACK = TCG_REG_R15,
  } TCGReg;
 -/* Used for function call generation. */
 -#define TCG_TARGET_CALL_STACK_OFFSET    0
 -#define TCG_TARGET_STACK_ALIGN          8
 -#if TCG_TARGET_REG_BITS == 32
 -# define TCG_TARGET_CALL_ARG_I32        TCG_CALL_ARG_EVEN
 -# define TCG_TARGET_CALL_ARG_I64        TCG_CALL_ARG_EVEN
 -# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
 -#else
 -# define TCG_TARGET_CALL_ARG_I32        TCG_CALL_ARG_NORMAL
 -# define TCG_TARGET_CALL_ARG_I64        TCG_CALL_ARG_NORMAL
 -# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
 -#endif
 -#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 -
  #define HAVE_TCG_QEMU_TB_EXEC
  #define TCG_TARGET_NEED_POOL_LABELS
 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target.c.inc
 +++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #include "../tcg-pool.c.inc"
  #include "qemu/bitops.h"
 +/* Used for function call generation. */
 +#define TCG_REG_CALL_STACK              TCG_REG_SP
 +#define TCG_TARGET_STACK_ALIGN          16
 +#define TCG_TARGET_CALL_STACK_OFFSET    0
 +#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 +#ifdef CONFIG_DARWIN
 +# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
 +#else
 +# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
 +#endif
 +#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 +
  /* We're going to re-use TCGType in setting of the SF bit, which controls
     the size of the operation performed.  If we know the values match, it
     makes things much cleaner.  */
 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.c.inc
 +++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ bool use_idiv_instructions;
  bool use_neon_instructions;
  #endif
 +/* Used for function call generation. */
 +#define TCG_TARGET_STACK_ALIGN          8
 +#define TCG_TARGET_CALL_STACK_OFFSET    0
 +#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_EVEN
 +#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_EVEN
 +#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_BY_REF
 +
  #ifdef CONFIG_DEBUG_TCG
  static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
      "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.c.inc
 +++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #include "../tcg-ldst.c.inc"
  #include "../tcg-pool.c.inc"
 +/* Used for function call generation. */
 +#define TCG_TARGET_STACK_ALIGN 16
 +#if defined(_WIN64)
 +#define TCG_TARGET_CALL_STACK_OFFSET 32
 +#else
 +#define TCG_TARGET_CALL_STACK_OFFSET 0
 +#endif
 +#define TCG_TARGET_CALL_ARG_I32      TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_NORMAL
 +#if defined(_WIN64)
 +# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_BY_REF
 +# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_BY_VEC
 +#elif TCG_TARGET_REG_BITS == 64
 +# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_NORMAL
 +# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_NORMAL
 +#else
 +# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_NORMAL
 +# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_BY_REF
 +#endif
 +
  #ifdef CONFIG_DEBUG_TCG
  static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  #if TCG_TARGET_REG_BITS == 64
 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.c.inc
 +++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #include "../tcg-ldst.c.inc"
  #include <asm/hwcap.h>
 +/* used for function call generation */
 +#define TCG_REG_CALL_STACK              TCG_REG_SP
 +#define TCG_TARGET_STACK_ALIGN          16
 +#define TCG_TARGET_CALL_STACK_OFFSET    0
 +#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 +
  #ifdef CONFIG_DEBUG_TCG
  static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
      "zero",
 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.c.inc
 +++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #include "../tcg-ldst.c.inc"
  #include "../tcg-pool.c.inc"
 +/* used for function call generation */
 +#define TCG_TARGET_STACK_ALIGN        16
 +#if _MIPS_SIM == _ABIO32
 +# define TCG_TARGET_CALL_STACK_OFFSET 16
 +# define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_EVEN
 +# define TCG_TARGET_CALL_RET_I128     TCG_CALL_RET_BY_REF
 +#else
 +# define TCG_TARGET_CALL_STACK_OFFSET 0
 +# define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_NORMAL
 +# define TCG_TARGET_CALL_RET_I128     TCG_CALL_RET_NORMAL
 +#endif
 +#define TCG_TARGET_CALL_ARG_I32       TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I128      TCG_CALL_ARG_EVEN
 +
  #if TCG_TARGET_REG_BITS == 32
  # define LO_OFF  (HOST_BIG_ENDIAN * 4)
  # define HI_OFF  (4 - LO_OFF)
 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target.c.inc
 +++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #include "../tcg-ldst.c.inc"
  #include "../tcg-pool.c.inc"
 +/* Used for function call generation. */
 +#define TCG_REG_CALL_STACK              TCG_REG_SP
 +#define TCG_TARGET_STACK_ALIGN          16
 +#define TCG_TARGET_CALL_STACK_OFFSET    0
 +#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 +
  #ifdef CONFIG_DEBUG_TCG
  static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
      "zero", "ra",  "sp",  "gp",  "tp",  "t0",  "t1",  "t2",
 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target.c.inc
 +++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #include "../tcg-pool.c.inc"
  #include "elf.h"
 +/* Used for function call generation. */
 +#define TCG_TARGET_STACK_ALIGN          8
 +#define TCG_TARGET_CALL_STACK_OFFSET    160
 +#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_EXTEND
 +#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_BY_REF
 +#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_BY_REF
 +
  #define TCG_CT_CONST_S16        (1 << 8)
  #define TCG_CT_CONST_S32        (1 << 9)
  #define TCG_CT_CONST_U32        (1 << 10)
 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc64/tcg-target.c.inc
 +++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #include "../tcg-ldst.c.inc"
  #include "../tcg-pool.c.inc"
 +/* Used for function call generation. */
 +#define TCG_REG_CALL_STACK              TCG_REG_O6
 +#define TCG_TARGET_STACK_BIAS           2047
 +#define TCG_TARGET_STACK_ALIGN          16
 +#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6 * 8 + TCG_TARGET_STACK_BIAS)
 +#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_EXTEND
 +#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
 +#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 +
  #ifdef CONFIG_DEBUG_TCG
  static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
      "%g0",
 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tci/tcg-target.c.inc
 +++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #include "../tcg-pool.c.inc"
 +/* Used for function call generation. */
 +#define TCG_TARGET_CALL_STACK_OFFSET    0
 +#define TCG_TARGET_STACK_ALIGN          8
 +#if TCG_TARGET_REG_BITS == 32
 +# define TCG_TARGET_CALL_ARG_I32        TCG_CALL_ARG_EVEN
 +# define TCG_TARGET_CALL_ARG_I64        TCG_CALL_ARG_EVEN
 +# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
 +#else
 +# define TCG_TARGET_CALL_ARG_I32        TCG_CALL_ARG_NORMAL
 +# define TCG_TARGET_CALL_ARG_I64        TCG_CALL_ARG_NORMAL
 +# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
 +#endif
 +#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 +
  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
  {
-     TCGOpcode opc = op->opc;
+     switch (op) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          }
 -        /* Propagate constants through copy operations and do constant
 -           folding.  Constants will be substituted to arguments by register
 -           allocator where needed and possible.  Also detect copies. */
 +        /*
 +         * Process each opcode.
 +         * Sorted alphabetically by opcode as much as possible.
 +         */
          switch (opc) {
 -        CASE_OP_32_64_VEC(mov):
 -            done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
 -            break;
 -
 -        default:
 -            break;
 -
 -        /* ---------------------------------------------------------- */
 -        /* Sorted alphabetically by opcode as much as possible. */
 -
          CASE_OP_32_64_VEC(add):
              done = fold_add(&ctx, op);
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          case INDEX_op_mb:
              done = fold_mb(&ctx, op);
              break;
 +        CASE_OP_32_64_VEC(mov):
 +            done = fold_mov(&ctx, op);
 +            break;
          CASE_OP_32_64(movcond):
              done = fold_movcond(&ctx, op);
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64_VEC(xor):
              done = fold_xor(&ctx, op);
              break;
 +        default:
 +            break;
          }
          if (!done) {
 --
-.25.1
+.43.0

-[PULL 10/56] tcg/optimize: Move prev_mb into OptContext
+[PULL 03/68] tcg: Replace TCGOP_VECL with TCGOP_TYPE
-This will expose the variable to subroutines that
+In the replacement, drop the TCGType - TCG_TYPE_V64 adjustment,
-will be broken out of tcg_optimize.
+except for the call to tcg_out_vec_op.  Pass type to tcg_gen_op[1-6],
 so that all integer opcodes gain the type.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 11 ++++++-----
+ include/tcg/tcg.h      |   2 +-
-file changed, 6 insertions(+), 5 deletions(-)
+ tcg/tcg-internal.h     |  13 ++---
  tcg/optimize.c         |  10 +---
  tcg/tcg-op-ldst.c      |  26 ++++++----
  tcg/tcg-op-vec.c       |   8 +--
  tcg/tcg-op.c           | 113 +++++++++++++++++++++++------------------
  tcg/tcg.c              |  11 ++--
  docs/devel/tcg-ops.rst |  15 +++---
 files changed, 105 insertions(+), 93 deletions(-)
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ struct TCGOp {
+ #define TCGOP_CALLI(X)    (X)->param1
+ #define TCGOP_CALLO(X)    (X)->param2
+-#define TCGOP_VECL(X)     (X)->param1
++#define TCGOP_TYPE(X)     (X)->param1
+ #define TCGOP_VECE(X)     (X)->param2
+ /* Make sure operands fit in the bitfields above.  */
+diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg-internal.h
++++ b/tcg/tcg-internal.h
+@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind);
+  */
+ TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
+-TCGOp *tcg_gen_op1(TCGOpcode, TCGArg);
+-TCGOp *tcg_gen_op2(TCGOpcode, TCGArg, TCGArg);
+-TCGOp *tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg);
+-TCGOp *tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
+-TCGOp *tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
+-TCGOp *tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
++TCGOp *tcg_gen_op1(TCGOpcode, TCGType, TCGArg);
++TCGOp *tcg_gen_op2(TCGOpcode, TCGType, TCGArg, TCGArg);
++TCGOp *tcg_gen_op3(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg);
++TCGOp *tcg_gen_op4(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg);
++TCGOp *tcg_gen_op5(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
++TCGOp *tcg_gen_op6(TCGOpcode, TCGType, TCGArg, TCGArg,
++                   TCGArg, TCGArg, TCGArg, TCGArg);
+ void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
+ void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
 diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/optimize.c
 +++ b/tcg/optimize.c
-@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
+@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+     case TCG_TYPE_V64:
- typedef struct OptContext {
+     case TCG_TYPE_V128:
-     TCGContext *tcg;
+     case TCG_TYPE_V256:
-+    TCGOp *prev_mb;
+-        /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
-     TCGTempSet temps_used;
++        /* TCGOP_TYPE and TCGOP_VECE remain unchanged.  */
- } OptContext;
+         new_op = INDEX_op_mov_vec;
+         break;
-@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+     default:
  void tcg_optimize(TCGContext *s)
  {
      int nb_temps, nb_globals, i;
 -    TCGOp *op, *op_next, *prev_mb = NULL;
 +    TCGOp *op, *op_next;
      OptContext ctx = { .tcg = s };
      /* Array VALS has an element for each temp.
 @@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
+         /* Pre-compute the type of the operation. */
+-        if (def->flags & TCG_OPF_VECTOR) {
+-            ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
+-        } else if (def->flags & TCG_OPF_64BIT) {
+-            ctx.type = TCG_TYPE_I64;
+-        } else {
+-            ctx.type = TCG_TYPE_I32;
+-        }
++        ctx.type = TCGOP_TYPE(op);
+         /*
+          * Process each opcode.
+diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg-op-ldst.c
++++ b/tcg/tcg-op-ldst.c
+@@ -XXX,XX +XXX,XX @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
+     return op;
+ }
+-static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
++static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh,
+                      TCGTemp *addr, MemOpIdx oi)
+ {
+     if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
+         if (vh) {
+-            tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
++            tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh),
++                        temp_arg(addr), oi);
+         } else {
+-            tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
++            tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi);
          }
+     } else {
-         /* Eliminate duplicate and redundant fence instructions.  */
+         /* See TCGV_LOW/HIGH. */
--        if (prev_mb) {
+@@ -XXX,XX +XXX,XX @@ static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
-+        if (ctx.prev_mb) {
+         TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
-             switch (opc) {
-             case INDEX_op_mb:
+         if (vh) {
-                 /* Merge two barriers of the same type into one,
+-            tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++            tcg_gen_op5(opc, type, temp_arg(vl), temp_arg(vh),
-                  * barrier.  This is stricter than specified but for
+                         temp_arg(al), temp_arg(ah), oi);
-                  * the purposes of TCG is better than not optimizing.
+         } else {
-                  */
+-            tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
--                prev_mb->args[0] |= op->args[0];
++            tcg_gen_op4(opc, type, temp_arg(vl),
-+                ctx.prev_mb->args[0] |= op->args[0];
++                        temp_arg(al), temp_arg(ah), oi);
                  tcg_op_remove(s, op);
                  break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              case INDEX_op_qemu_st_i64:
              case INDEX_op_call:
                  /* Opcodes that touch guest memory stop the optimization.  */
 -                prev_mb = NULL;
 +                ctx.prev_mb = NULL;
                  break;
              }
          } else if (opc == INDEX_op_mb) {
 -            prev_mb = op;
 +            ctx.prev_mb = op;
          }
      }
  }
+@@ -XXX,XX +XXX,XX @@ static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
+     if (TCG_TARGET_REG_BITS == 32) {
+         TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
+         TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
+-        gen_ldst(opc, vl, vh, addr, oi);
++        gen_ldst(opc, TCG_TYPE_I64, vl, vh, addr, oi);
+     } else {
+-        gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
++        gen_ldst(opc, TCG_TYPE_I64, tcgv_i64_temp(v), NULL, addr, oi);
+     }
+ }
+@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
+     } else {
+         opc = INDEX_op_qemu_ld_a64_i32;
+     }
+-    gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
++    gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
+     plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
+                                  QEMU_PLUGIN_MEM_R);
+@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
+             opc = INDEX_op_qemu_st_a64_i32;
+         }
+     }
+-    gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
++    gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
+     plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
+     if (swap) {
+@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
+         } else {
+             opc = INDEX_op_qemu_ld_a64_i128;
+         }
+-        gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
++        gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
++                 tcgv_i64_temp(hi), addr, oi);
+         if (need_bswap) {
+             tcg_gen_bswap64_i64(lo, lo);
+@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
+         } else {
+             opc = INDEX_op_qemu_st_a64_i128;
+         }
+-        gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
++        gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
++                 tcgv_i64_temp(hi), addr, oi);
+         if (need_bswap) {
+             tcg_temp_free_i64(lo);
+diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg-op-vec.c
++++ b/tcg/tcg-op-vec.c
+@@ -XXX,XX +XXX,XX @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
+ void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
+ {
+     TCGOp *op = tcg_emit_op(opc, 2);
+-    TCGOP_VECL(op) = type - TCG_TYPE_V64;
++    TCGOP_TYPE(op) = type;
+     TCGOP_VECE(op) = vece;
+     op->args[0] = r;
+     op->args[1] = a;
+@@ -XXX,XX +XXX,XX @@ void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
+                TCGArg r, TCGArg a, TCGArg b)
+ {
+     TCGOp *op = tcg_emit_op(opc, 3);
+-    TCGOP_VECL(op) = type - TCG_TYPE_V64;
++    TCGOP_TYPE(op) = type;
+     TCGOP_VECE(op) = vece;
+     op->args[0] = r;
+     op->args[1] = a;
+@@ -XXX,XX +XXX,XX @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
+                TCGArg r, TCGArg a, TCGArg b, TCGArg c)
+ {
+     TCGOp *op = tcg_emit_op(opc, 4);
+-    TCGOP_VECL(op) = type - TCG_TYPE_V64;
++    TCGOP_TYPE(op) = type;
+     TCGOP_VECE(op) = vece;
+     op->args[0] = r;
+     op->args[1] = a;
+@@ -XXX,XX +XXX,XX @@ void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
+                TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
+ {
+     TCGOp *op = tcg_emit_op(opc, 6);
+-    TCGOP_VECL(op) = type - TCG_TYPE_V64;
++    TCGOP_TYPE(op) = type;
+     TCGOP_VECE(op) = vece;
+     op->args[0] = r;
+     op->args[1] = a;
+diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg-op.c
++++ b/tcg/tcg-op.c
+@@ -XXX,XX +XXX,XX @@
+  */
+ #define NI  __attribute__((noinline))
+-TCGOp * NI tcg_gen_op1(TCGOpcode opc, TCGArg a1)
++TCGOp * NI tcg_gen_op1(TCGOpcode opc, TCGType type, TCGArg a1)
+ {
+     TCGOp *op = tcg_emit_op(opc, 1);
++    TCGOP_TYPE(op) = type;
+     op->args[0] = a1;
+     return op;
+ }
+-TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
++TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2)
+ {
+     TCGOp *op = tcg_emit_op(opc, 2);
++    TCGOP_TYPE(op) = type;
+     op->args[0] = a1;
+     op->args[1] = a2;
+     return op;
+ }
+-TCGOp * NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
++TCGOp * NI tcg_gen_op3(TCGOpcode opc, TCGType type, TCGArg a1,
++                       TCGArg a2, TCGArg a3)
+ {
+     TCGOp *op = tcg_emit_op(opc, 3);
++    TCGOP_TYPE(op) = type;
+     op->args[0] = a1;
+     op->args[1] = a2;
+     op->args[2] = a3;
+     return op;
+ }
+-TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2,
++TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
+                        TCGArg a3, TCGArg a4)
+ {
+     TCGOp *op = tcg_emit_op(opc, 4);
++    TCGOP_TYPE(op) = type;
+     op->args[0] = a1;
+     op->args[1] = a2;
+     op->args[2] = a3;
+@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2,
+     return op;
+ }
+-TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2,
++TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
+                        TCGArg a3, TCGArg a4, TCGArg a5)
+ {
+     TCGOp *op = tcg_emit_op(opc, 5);
++    TCGOP_TYPE(op) = type;
+     op->args[0] = a1;
+     op->args[1] = a2;
+     op->args[2] = a3;
+@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2,
+     return op;
+ }
+-TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
+-                       TCGArg a4, TCGArg a5, TCGArg a6)
++TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
++                       TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6)
+ {
+     TCGOp *op = tcg_emit_op(opc, 6);
++    TCGOP_TYPE(op) = type;
+     op->args[0] = a1;
+     op->args[1] = a2;
+     op->args[2] = a3;
+@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
+ # define DNI
+ #endif
+-static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
++static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGType type, TCGv_i32 a1)
+ {
+-    tcg_gen_op1(opc, tcgv_i32_arg(a1));
++    tcg_gen_op1(opc, type, tcgv_i32_arg(a1));
+ }
+-static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1)
++static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGType type, TCGv_i64 a1)
+ {
+-    tcg_gen_op1(opc, tcgv_i64_arg(a1));
++    tcg_gen_op1(opc, type, tcgv_i64_arg(a1));
+ }
+-static TCGOp * DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1)
++static TCGOp * DNI tcg_gen_op1i(TCGOpcode opc, TCGType type, TCGArg a1)
+ {
+-    return tcg_gen_op1(opc, a1);
++    return tcg_gen_op1(opc, type, a1);
+ }
+ static void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2)
+ {
+-    tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2));
++    tcg_gen_op2(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2));
+ }
+ static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2)
+ {
+-    tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2));
++    tcg_gen_op2(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2));
+ }
+ static void DNI tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1,
+                                 TCGv_i32 a2, TCGv_i32 a3)
+ {
+-    tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3));
++    tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(a1),
++                tcgv_i32_arg(a2), tcgv_i32_arg(a3));
+ }
+ static void DNI tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1,
+                                 TCGv_i64 a2, TCGv_i64 a3)
+ {
+-    tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3));
++    tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(a1),
++                tcgv_i64_arg(a2), tcgv_i64_arg(a3));
+ }
+ static void DNI tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1,
+                                  TCGv_i32 a2, TCGArg a3)
+ {
+-    tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3);
++    tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3);
+ }
+ static void DNI tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1,
+                                  TCGv_i64 a2, TCGArg a3)
+ {
+-    tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3);
++    tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3);
+ }
+ static void DNI tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
+                                     TCGv_ptr base, TCGArg offset)
+ {
+-    tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset);
++    tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(val),
++                tcgv_ptr_arg(base), offset);
+ }
+ static void DNI tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
+                                     TCGv_ptr base, TCGArg offset)
+ {
+-    tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset);
++    tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(val),
++                tcgv_ptr_arg(base), offset);
+ }
+ static void DNI tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                 TCGv_i32 a3, TCGv_i32 a4)
+ {
+-    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
++    tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                 tcgv_i32_arg(a3), tcgv_i32_arg(a4));
+ }
+ static void DNI tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                 TCGv_i64 a3, TCGv_i64 a4)
+ {
+-    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
++    tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                 tcgv_i64_arg(a3), tcgv_i64_arg(a4));
+ }
+ static void DNI tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                  TCGv_i32 a3, TCGArg a4)
+ {
+-    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
++    tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                 tcgv_i32_arg(a3), a4);
+ }
+ static void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                  TCGv_i64 a3, TCGArg a4)
+ {
+-    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
++    tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                 tcgv_i64_arg(a3), a4);
+ }
+ static TCGOp * DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                      TCGArg a3, TCGArg a4)
+ {
+-    return tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
++    return tcg_gen_op4(opc, TCG_TYPE_I32,
++                       tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
+ }
+ static TCGOp * DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                      TCGArg a3, TCGArg a4)
+ {
+-    return tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
++    return tcg_gen_op4(opc, TCG_TYPE_I64,
++                       tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
+ }
+ static void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                 TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5)
+ {
+-    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
++    tcg_gen_op5(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                 tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5));
+ }
+ static void DNI tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                 TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5)
+ {
+-    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
++    tcg_gen_op5(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                 tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5));
+ }
+ static void DNI tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                   TCGv_i32 a3, TCGArg a4, TCGArg a5)
+ {
+-    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
++    tcg_gen_op5(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                 tcgv_i32_arg(a3), a4, a5);
+ }
+ static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                   TCGv_i64 a3, TCGArg a4, TCGArg a5)
+ {
+-    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
++    tcg_gen_op5(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                 tcgv_i64_arg(a3), a4, a5);
+ }
+@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                 TCGv_i32 a3, TCGv_i32 a4,
+                                 TCGv_i32 a5, TCGv_i32 a6)
+ {
+-    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
++    tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                 tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5),
+                 tcgv_i32_arg(a6));
+ }
+@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                 TCGv_i64 a3, TCGv_i64 a4,
+                                 TCGv_i64 a5, TCGv_i64 a6)
+ {
+-    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
++    tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                 tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5),
+                 tcgv_i64_arg(a6));
+ }
+@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                  TCGv_i32 a3, TCGv_i32 a4,
+                                  TCGv_i32 a5, TCGArg a6)
+ {
+-    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
++    tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                 tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6);
+ }
+@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                  TCGv_i64 a3, TCGv_i64 a4,
+                                  TCGv_i64 a5, TCGArg a6)
+ {
+-    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
++    tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                 tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6);
+ }
+@@ -XXX,XX +XXX,XX @@ static TCGOp * DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                      TCGv_i32 a3, TCGv_i32 a4,
+                                      TCGArg a5, TCGArg a6)
+ {
+-    return tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
++    return tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                        tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6);
+ }
+@@ -XXX,XX +XXX,XX @@ static TCGOp * DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ void gen_set_label(TCGLabel *l)
+ {
+     l->present = 1;
+-    tcg_gen_op1(INDEX_op_set_label, label_arg(l));
++    tcg_gen_op1(INDEX_op_set_label, 0, label_arg(l));
+ }
+ static void add_as_label_use(TCGLabel *l, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ static void add_as_label_use(TCGLabel *l, TCGOp *op)
+ void tcg_gen_br(TCGLabel *l)
+ {
+-    add_as_label_use(l, tcg_gen_op1(INDEX_op_br, label_arg(l)));
++    add_as_label_use(l, tcg_gen_op1(INDEX_op_br, 0, label_arg(l)));
+ }
+ void tcg_gen_mb(TCGBar mb_type)
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_mb(TCGBar mb_type)
+ #endif
+     if (parallel) {
+-        tcg_gen_op1(INDEX_op_mb, mb_type);
++        tcg_gen_op1(INDEX_op_mb, 0, mb_type);
+     }
+ }
+ void tcg_gen_plugin_cb(unsigned from)
+ {
+-    tcg_gen_op1(INDEX_op_plugin_cb, from);
++    tcg_gen_op1(INDEX_op_plugin_cb, 0, from);
+ }
+ void tcg_gen_plugin_mem_cb(TCGv_i64 addr, unsigned meminfo)
+ {
+-    tcg_gen_op2(INDEX_op_plugin_mem_cb, tcgv_i64_arg(addr), meminfo);
++    tcg_gen_op2(INDEX_op_plugin_mem_cb, 0, tcgv_i64_arg(addr), meminfo);
+ }
+ /* 32 bit ops */
+ void tcg_gen_discard_i32(TCGv_i32 arg)
+ {
+-    tcg_gen_op1_i32(INDEX_op_discard, arg);
++    tcg_gen_op1_i32(INDEX_op_discard, TCG_TYPE_I32, arg);
+ }
+ void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
+ void tcg_gen_discard_i64(TCGv_i64 arg)
+ {
+     if (TCG_TARGET_REG_BITS == 64) {
+-        tcg_gen_op1_i64(INDEX_op_discard, arg);
++        tcg_gen_op1_i64(INDEX_op_discard, TCG_TYPE_I64, arg);
+     } else {
+         tcg_gen_discard_i32(TCGV_LOW(arg));
+         tcg_gen_discard_i32(TCGV_HIGH(arg));
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
+     if (TCG_TARGET_REG_BITS == 32) {
+         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
+     } else if (TCG_TARGET_HAS_extr_i64_i32) {
+-        tcg_gen_op2(INDEX_op_extrl_i64_i32,
++        tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32,
+                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
+     } else {
+         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
+     if (TCG_TARGET_REG_BITS == 32) {
+         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
+     } else if (TCG_TARGET_HAS_extr_i64_i32) {
+-        tcg_gen_op2(INDEX_op_extrh_i64_i32,
++        tcg_gen_op2(INDEX_op_extrh_i64_i32, TCG_TYPE_I32,
+                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
+     } else {
+         TCGv_i64 t = tcg_temp_ebb_new_i64();
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+     } else {
+-        tcg_gen_op2(INDEX_op_extu_i32_i64,
++        tcg_gen_op2(INDEX_op_extu_i32_i64, TCG_TYPE_I64,
+                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
+     }
+ }
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+     } else {
+-        tcg_gen_op2(INDEX_op_ext_i32_i64,
++        tcg_gen_op2(INDEX_op_ext_i32_i64, TCG_TYPE_I64,
+                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
+     }
+ }
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
+         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
+     }
+-    tcg_gen_op1i(INDEX_op_exit_tb, val);
++    tcg_gen_op1i(INDEX_op_exit_tb, 0, val);
+ }
+ void tcg_gen_goto_tb(unsigned idx)
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_goto_tb(unsigned idx)
+     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
+ #endif
+     plugin_gen_disable_mem_helpers();
+-    tcg_gen_op1i(INDEX_op_goto_tb, idx);
++    tcg_gen_op1i(INDEX_op_goto_tb, 0, idx);
+ }
+ void tcg_gen_lookup_and_goto_ptr(void)
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_lookup_and_goto_ptr(void)
+     plugin_gen_disable_mem_helpers();
+     ptr = tcg_temp_ebb_new_ptr();
+     gen_helper_lookup_tb_ptr(ptr, tcg_env);
+-    tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
++    tcg_gen_op1i(INDEX_op_goto_ptr, TCG_TYPE_PTR, tcgv_ptr_arg(ptr));
+     tcg_temp_free_ptr(ptr);
+ }
+diff --git a/tcg/tcg.c b/tcg/tcg.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg.c
++++ b/tcg/tcg.c
+@@ -XXX,XX +XXX,XX @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
+             nb_cargs = def->nb_cargs;
+             if (def->flags & TCG_OPF_VECTOR) {
+-                col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
++                col += ne_fprintf(f, "v%d,e%d,",
++                                  8 * tcg_type_size(TCGOP_TYPE(op)),
+<< TCGOP_VECE(op));
+             }
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
+     itype = its->type;
+     vece = TCGOP_VECE(op);
+-    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
++    vtype = TCGOP_TYPE(op);
+     if (its->val_type == TEMP_VAL_CONST) {
+         /* Propagate constant via movi -> dupi.  */
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+         break;
+     default:
+         if (def->flags & TCG_OPF_VECTOR) {
+-            tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
+-                           new_args, const_args);
++            tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
++                           TCGOP_VECE(op), new_args, const_args);
+         } else {
+             tcg_out_op(s, op->opc, new_args, const_args);
+         }
+@@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
+ {
+     const TCGLifeData arg_life = op->life;
+     TCGTemp *ots, *itsl, *itsh;
+-    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
++    TCGType vtype = TCGOP_TYPE(op);
+     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
+     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
+diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
+index XXXXXXX..XXXXXXX 100644
+--- a/docs/devel/tcg-ops.rst
++++ b/docs/devel/tcg-ops.rst
+@@ -XXX,XX +XXX,XX @@ QEMU specific operations
+ Host vector operations
+ ----------------------
+-All of the vector ops have two parameters, ``TCGOP_VECL`` & ``TCGOP_VECE``.
+-The former specifies the length of the vector in log2 64-bit units; the
+-latter specifies the length of the element (if applicable) in log2 8-bit units.
+-E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
++All of the vector ops have two parameters, ``TCGOP_TYPE`` & ``TCGOP_VECE``.
++The former specifies the length of the vector as a TCGType; the latter
++specifies the length of the element (if applicable) in log2 8-bit units.
+ .. list-table::
+@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
+    * - dup_vec *v0*, *r1*
+-     - | Duplicate the low N bits of *r1* into VECL/VECE copies across *v0*.
++     - | Duplicate the low N bits of *r1* into TYPE/VECE copies across *v0*.
+    * - dupi_vec *v0*, *c*
+@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
+    * - dup2_vec *v0*, *r1*, *r2*
+-     - | Duplicate *r2*:*r1* into VECL/64 copies across *v0*. This opcode is
++     - | Duplicate *r2*:*r1* into TYPE/64 copies across *v0*. This opcode is
+          only present for 32-bit hosts.
+    * - add_vec *v0*, *v1*, *v2*
+@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
+        .. code-block:: c
+-          for (i = 0; i < VECL/VECE; ++i) {
++          for (i = 0; i < TYPE/VECE; ++i) {
+               v0[i] = v1[i] << s2;
+           }
+@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
+        .. code-block:: c
+-          for (i = 0; i < VECL/VECE; ++i) {
++          for (i = 0; i < TYPE/VECE; ++i) {
+               v0[i] = v1[i] << v2[i];
+           }
 --
-.25.1
+.43.0

-[PULL 51/56] tcg/optimize: Use fold_xx_to_i for rem
+[PULL 04/68] tcg: Move tcg_op_insert_{after, before} decls to tcg-internal.h
-Recognize the constant function for remainder.
+These are not particularly useful outside of optimization passes.
-Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 6 +++++-
+ include/tcg/tcg.h  | 4 ----
-file changed, 5 insertions(+), 1 deletion(-)
+ tcg/tcg-internal.h | 5 +++++
 files changed, 5 insertions(+), 4 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_call7(void *func, TCGHelperInfo *, TCGTemp *ret,
- static bool fold_remainder(OptContext *ctx, TCGOp *op)
+ TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs);
- {
+ void tcg_op_remove(TCGContext *s, TCGOp *op);
--    return fold_const2(ctx, op);
+-TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op,
-+    if (fold_const2(ctx, op) ||
+-                            TCGOpcode opc, unsigned nargs);
-+        fold_xx_to_i(ctx, op, 0)) {
+-TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op,
-+        return true;
+-                           TCGOpcode opc, unsigned nargs);
-+    }
-+    return false;
+ /**
- }
+  * tcg_remove_ops_after:
+diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
- static bool fold_setcond(OptContext *ctx, TCGOp *op)
+index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-internal.h
 +++ b/tcg/tcg-internal.h
@@ -XXX,XX +XXX,XX @@ void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
  void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
                 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e);
 +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op,
 +                            TCGOpcode opc, unsigned nargs);
 +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op,
 +                           TCGOpcode opc, unsigned nargs);
 +
  #endif /* TCG_INTERNAL_H */
 --
-.25.1
+.43.0

-[PULL 19/56] tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
+[PULL 05/68] tcg: Copy TCGOP_TYPE in tcg_op_insert_{after,before}
-This puts the separate mb optimization into the same framework
+Simplify use within the optimizers by defaulting the
-as the others.  While fold_qemu_{ld,st} are currently identical,
+new opcode to the same type as the old opcode.
 that won't last as more code gets moved.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
+ tcg/tcg.c | 4 ++++
-file changed, 51 insertions(+), 38 deletions(-)
+file changed, 4 insertions(+)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/tcg.c
-+++ b/tcg/optimize.c
++++ b/tcg/tcg.c
-@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
-     return true;
+                             TCGOpcode opc, unsigned nargs)
  {
      TCGOp *new_op = tcg_op_alloc(opc, nargs);
 +
 +    TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
      QTAILQ_INSERT_BEFORE(old_op, new_op, link);
      return new_op;
  }
+@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
-+static bool fold_mb(OptContext *ctx, TCGOp *op)
+                            TCGOpcode opc, unsigned nargs)
-+{
+ {
-+    /* Eliminate duplicate and redundant fence instructions.  */
+     TCGOp *new_op = tcg_op_alloc(opc, nargs);
 +    if (ctx->prev_mb) {
 +        /*
 +         * Merge two barriers of the same type into one,
 +         * or a weaker barrier into a stronger one,
 +         * or two weaker barriers into a stronger one.
 +         *   mb X; mb Y => mb X|Y
 +         *   mb; strl => mb; st
 +         *   ldaq; mb => ld; mb
 +         *   ldaq; strl => ld; mb; st
 +         * Other combinations are also merged into a strong
 +         * barrier.  This is stricter than specified but for
 +         * the purposes of TCG is better than not optimizing.
 +         */
 +        ctx->prev_mb->args[0] |= op->args[0];
 +        tcg_op_remove(ctx->tcg, op);
 +    } else {
 +        ctx->prev_mb = op;
 +    }
 +    return true;
 +}
 +
-+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
++    TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
-+{
+     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
-+    /* Opcodes that touch guest memory stop the mb optimization.  */
+     return new_op;
 +    ctx->prev_mb = NULL;
 +    return false;
 +}
 +
 +static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
 +{
 +    /* Opcodes that touch guest memory stop the mb optimization.  */
 +    ctx->prev_mb = NULL;
 +    return false;
 +}
 +
  /* Propagate constants and copies, fold constant expressions. */
  void tcg_optimize(TCGContext *s)
  {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              }
              break;
 +        case INDEX_op_mb:
 +            done = fold_mb(&ctx, op);
 +            break;
 +        case INDEX_op_qemu_ld_i32:
 +        case INDEX_op_qemu_ld_i64:
 +            done = fold_qemu_ld(&ctx, op);
 +            break;
 +        case INDEX_op_qemu_st_i32:
 +        case INDEX_op_qemu_st8_i32:
 +        case INDEX_op_qemu_st_i64:
 +            done = fold_qemu_st(&ctx, op);
 +            break;
 +
          default:
              break;
          }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          if (!done) {
              finish_folding(&ctx, op);
          }
 -
 -        /* Eliminate duplicate and redundant fence instructions.  */
 -        if (ctx.prev_mb) {
 -            switch (opc) {
 -            case INDEX_op_mb:
 -                /* Merge two barriers of the same type into one,
 -                 * or a weaker barrier into a stronger one,
 -                 * or two weaker barriers into a stronger one.
 -                 *   mb X; mb Y => mb X|Y
 -                 *   mb; strl => mb; st
 -                 *   ldaq; mb => ld; mb
 -                 *   ldaq; strl => ld; mb; st
 -                 * Other combinations are also merged into a strong
 -                 * barrier.  This is stricter than specified but for
 -                 * the purposes of TCG is better than not optimizing.
 -                 */
 -                ctx.prev_mb->args[0] |= op->args[0];
 -                tcg_op_remove(s, op);
 -                break;
 -
 -            default:
 -                /* Opcodes that end the block stop the optimization.  */
 -                if ((def->flags & TCG_OPF_BB_END) == 0) {
 -                    break;
 -                }
 -                /* fallthru */
 -            case INDEX_op_qemu_ld_i32:
 -            case INDEX_op_qemu_ld_i64:
 -            case INDEX_op_qemu_st_i32:
 -            case INDEX_op_qemu_st8_i32:
 -            case INDEX_op_qemu_st_i64:
 -                /* Opcodes that touch guest memory stop the optimization.  */
 -                ctx.prev_mb = NULL;
 -                break;
 -            }
 -        } else if (opc == INDEX_op_mb) {
 -            ctx.prev_mb = op;
 -        }
      }
  }
 --
-.25.1
+.43.0

-New patch
+[PULL 06/68] tcg: Add TCGOP_FLAGS
+To be used by some integer operations instead of,
+or in addition to, a trailing constant argument.
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ include/tcg/tcg.h | 1 +
+file changed, 1 insertion(+)
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ struct TCGOp {
+ #define TCGOP_CALLO(X)    (X)->param2
+ #define TCGOP_TYPE(X)     (X)->param1
++#define TCGOP_FLAGS(X)    (X)->param2
+ #define TCGOP_VECE(X)     (X)->param2
+ /* Make sure operands fit in the bitfields above.  */
+--
+.43.0

-[PULL 50/56] tcg/optimize: Use fold_xi_to_x for div
+[PULL 07/68] tcg: Add type and flags arguments to tcg_op_supported
-Recognize the identity function for division.
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 6 +++++-
+ include/tcg/tcg.h |  7 ++++++-
-file changed, 5 insertions(+), 1 deletion(-)
+ tcg/tcg.c         | 11 +++++++----
 files changed, 13 insertions(+), 5 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ typedef struct TCGTargetOpDef {
+     const char *args_ct_str[TCG_MAX_OP_ARGS];
- static bool fold_divide(OptContext *ctx, TCGOp *op)
+ } TCGTargetOpDef;
 -bool tcg_op_supported(TCGOpcode op);
 +/*
 + * tcg_op_supported:
 + * Query if @op, for @type and @flags, is supported by the host
 + * on which we are currently executing.
 + */
 +bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags);
  void tcg_gen_call0(void *func, TCGHelperInfo *, TCGTemp *ret);
  void tcg_gen_call1(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *);
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcgv_i32_temp(TCGv_i32 v)
  }
  #endif /* CONFIG_DEBUG_TCG */
 -/* Return true if OP may appear in the opcode stream.
 -   Test the runtime variable that controls each opcode.  */
 -bool tcg_op_supported(TCGOpcode op)
 +/*
 + * Return true if OP may appear in the opcode stream with TYPE.
 + * Test the runtime variable that controls each opcode.
 + */
 +bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
  {
--    return fold_const2(ctx, op);
+     const bool have_vec
-+    if (fold_const2(ctx, op) ||
+         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
-+        fold_xi_to_x(ctx, op, 1)) {
+@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
-+        return true;
+             /* fall through */
-+    }
+         default:
-+    return false;
+             /* Sanity check that we've not introduced any unhandled opcodes. */
- }
+-            tcg_debug_assert(tcg_op_supported(opc));
++            tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
- static bool fold_dup(OptContext *ctx, TCGOp *op)
++                                              TCGOP_FLAGS(op)));
              /* Note: in order to speed up the code, it would be much
                 faster to have specialized register allocator functions for
                 some common argument patterns */
 --
-.25.1
+.43.0

-[PULL 41/56] tcg/optimize: Split out fold_xi_to_x
+[PULL 08/68] target/arm: Do not test TCG_TARGET_HAS_bitsel_vec
-Pull the "op r, a, i => mov r, a" optimization into a function,
+Rely on tcg-op-vec.c to expand the opcode if missing.
 and use them in the outer-most logical operations.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
+ target/arm/tcg/translate-sve.c | 20 ++++----------------
-file changed, 26 insertions(+), 35 deletions(-)
+file changed, 4 insertions(+), 16 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/target/arm/tcg/translate-sve.c
-+++ b/tcg/optimize.c
++++ b/target/arm/tcg/translate-sve.c
-@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+@@ -XXX,XX +XXX,XX @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
-     return false;
+ static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
                            TCGv_vec m, TCGv_vec k)
  {
 -    if (TCG_TARGET_HAS_bitsel_vec) {
 -        tcg_gen_not_vec(vece, n, n);
 -        tcg_gen_bitsel_vec(vece, d, k, n, m);
 -    } else {
 -        tcg_gen_andc_vec(vece, n, k, n);
 -        tcg_gen_andc_vec(vece, m, m, k);
 -        tcg_gen_or_vec(vece, d, n, m);
 -    }
 +    tcg_gen_not_vec(vece, n, n);
 +    tcg_gen_bitsel_vec(vece, d, k, n, m);
  }
-+/* If the binary operation has second argument @i, fold to identity. */
+ static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
-+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
+@@ -XXX,XX +XXX,XX @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
-+{
+ static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
-+    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+                           TCGv_vec m, TCGv_vec k)
 +        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 +    }
 +    return false;
 +}
 +
  /* If the binary operation has second argument @i, fold to NOT. */
  static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
  {
-@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
+-    if (TCG_TARGET_HAS_bitsel_vec) {
+-        tcg_gen_not_vec(vece, m, m);
- static bool fold_add(OptContext *ctx, TCGOp *op)
+-        tcg_gen_bitsel_vec(vece, d, k, n, m);
- {
+-    } else {
--    return fold_const2(ctx, op);
+-        tcg_gen_and_vec(vece, n, n, k);
-+    if (fold_const2(ctx, op) ||
+-        tcg_gen_or_vec(vece, m, m, k);
-+        fold_xi_to_x(ctx, op, 0)) {
+-        tcg_gen_orc_vec(vece, d, n, m);
-+        return true;
+-    }
-+    }
++    tcg_gen_not_vec(vece, m, m);
-+    return false;
++    tcg_gen_bitsel_vec(vece, d, k, n, m);
  }
- static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
+ static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
          fold_xi_to_i(ctx, op, 0) ||
 +        fold_xi_to_x(ctx, op, -1) ||
          fold_xx_to_x(ctx, op)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
          fold_xx_to_i(ctx, op, 0) ||
 +        fold_xi_to_x(ctx, op, 0) ||
          fold_ix_to_not(ctx, op, -1)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
  static bool fold_eqv(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
 +        fold_xi_to_x(ctx, op, -1) ||
          fold_xi_to_not(ctx, op, 0)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
  static bool fold_or(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
 +        fold_xi_to_x(ctx, op, 0) ||
          fold_xx_to_x(ctx, op)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
  static bool fold_orc(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
 +        fold_xi_to_x(ctx, op, -1) ||
          fold_ix_to_not(ctx, op, 0)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
  static bool fold_shift(OptContext *ctx, TCGOp *op)
  {
 -    return fold_const2(ctx, op);
 +    if (fold_const2(ctx, op) ||
 +        fold_xi_to_x(ctx, op, 0)) {
 +        return true;
 +    }
 +    return false;
  }
  static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
          fold_xx_to_i(ctx, op, 0) ||
 +        fold_xi_to_x(ctx, op, 0) ||
          fold_sub_to_neg(ctx, op)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
          fold_xx_to_i(ctx, op, 0) ||
 +        fold_xi_to_x(ctx, op, 0) ||
          fold_xi_to_not(ctx, op, -1)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          }
 -        /* Simplify expression for "op r, a, const => mov r, a" cases */
 -        switch (opc) {
 -        CASE_OP_32_64_VEC(add):
 -        CASE_OP_32_64_VEC(sub):
 -        CASE_OP_32_64_VEC(or):
 -        CASE_OP_32_64_VEC(xor):
 -        CASE_OP_32_64_VEC(andc):
 -        CASE_OP_32_64(shl):
 -        CASE_OP_32_64(shr):
 -        CASE_OP_32_64(sar):
 -        CASE_OP_32_64(rotl):
 -        CASE_OP_32_64(rotr):
 -            if (!arg_is_const(op->args[1])
 -                && arg_is_const(op->args[2])
 -                && arg_info(op->args[2])->val == 0) {
 -                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
 -                continue;
 -            }
 -            break;
 -        CASE_OP_32_64_VEC(and):
 -        CASE_OP_32_64_VEC(orc):
 -        CASE_OP_32_64(eqv):
 -            if (!arg_is_const(op->args[1])
 -                && arg_is_const(op->args[2])
 -                && arg_info(op->args[2])->val == -1) {
 -                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
 -                continue;
 -            }
 -            break;
 -        default:
 -            break;
 -        }
 -
          /* Simplify using known-zero bits. Currently only ops with a single
             output argument is supported. */
          z_mask = -1;
 --
-.25.1
+.43.0

-[PULL 15/56] tcg/optimize: Change fail return for do_constant_folding_cond*
+[PULL 09/68] target/arm: Use tcg_op_supported
-Return -1 instead of 2 for failure, so that we can
+Do not reference TCG_TARGET_HAS_* directly.
 use comparisons against 0 for all cases.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
+ target/arm/tcg/translate-a64.c | 10 ++++++----
-file changed, 74 insertions(+), 71 deletions(-)
+ target/arm/tcg/translate-sve.c |  2 +-
  target/arm/tcg/translate.c     |  2 +-
 files changed, 8 insertions(+), 6 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/target/arm/tcg/translate-a64.c
-+++ b/tcg/optimize.c
++++ b/target/arm/tcg/translate-a64.c
-@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
+@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
-     }
+     TCGv_i64 tcg_rn, tcg_y;
- }
+     DisasCompare c;
+     unsigned nzcv;
--/* Return 2 if the condition can't be simplified, and the result
++    bool has_andc;
--   of the condition (0 or 1) if it can */
--static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
+     /* Set T0 = !COND.  */
--                                       TCGArg y, TCGCond c)
+     arm_test_cc(&c, a->cond);
-+/*
+@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
-+ * Return -1 if the condition can't be simplified,
+     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
-+ * and the result of the condition (0 or 1) if it can.
-+ */
+     nzcv = a->nzcv;
-+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
++    has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
-+                                    TCGArg y, TCGCond c)
+     if (nzcv & 8) { /* N */
- {
+         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
-     uint64_t xv = arg_info(x)->val;
+     } else {
-     uint64_t yv = arg_info(y)->val;
+-        if (TCG_TARGET_HAS_andc_i32) {
-@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
++        if (has_andc) {
-         case TCG_COND_GEU:
+             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
-             return 1;
+         } else {
-         default:
+             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
 -            return 2;
 +            return -1;
          }
      }
--    return 2;
+     if (nzcv & 4) { /* Z */
-+    return -1;
+-        if (TCG_TARGET_HAS_andc_i32) {
- }
++        if (has_andc) {
+             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
--/* Return 2 if the condition can't be simplified, and the result
+         } else {
--   of the condition (0 or 1) if it can */
+             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
--static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
-+/*
+     if (nzcv & 2) { /* C */
-+ * Return -1 if the condition can't be simplified,
+         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
-+ * and the result of the condition (0 or 1) if it can.
+     } else {
-+ */
+-        if (TCG_TARGET_HAS_andc_i32) {
-+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
++        if (has_andc) {
              tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
          } else {
              tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
      if (nzcv & 1) { /* V */
          tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
      } else {
 -        if (TCG_TARGET_HAS_andc_i32) {
 +        if (has_andc) {
              tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
          } else {
              tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
 diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/translate-sve.c
 +++ b/target/arm/tcg/translate-sve.c
@@ -XXX,XX +XXX,XX @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
       *       =         | ~(m | k)
       */
      tcg_gen_and_i64(n, n, k);
 -    if (TCG_TARGET_HAS_orc_i64) {
 +    if (tcg_op_supported(INDEX_op_orc_i64, TCG_TYPE_I64, 0)) {
          tcg_gen_or_i64(m, m, k);
          tcg_gen_orc_i64(d, n, m);
      } else {
 diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/tcg/translate.c
 +++ b/target/arm/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
  static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
  {
-     TCGArg al = p1[0], ah = p1[1];
+     TCGv_i32 tmp = tcg_temp_new_i32();
-     TCGArg bl = p2[0], bh = p2[1];
+-    if (TCG_TARGET_HAS_add2_i32) {
-@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
++    if (tcg_op_supported(INDEX_op_add2_i32, TCG_TYPE_I32, 0)) {
-     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
+         tcg_gen_movi_i32(tmp, 0);
-         return do_constant_folding_cond_eq(c);
+         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
-     }
+         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 -    return 2;
 +    return -1;
  }
  static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          CASE_OP_32_64(setcond):
 -            tmp = do_constant_folding_cond(opc, op->args[1],
 -                                           op->args[2], op->args[3]);
 -            if (tmp != 2) {
 -                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
 +            i = do_constant_folding_cond(opc, op->args[1],
 +                                         op->args[2], op->args[3]);
 +            if (i >= 0) {
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], i);
                  continue;
              }
              break;
          CASE_OP_32_64(brcond):
 -            tmp = do_constant_folding_cond(opc, op->args[0],
 -                                           op->args[1], op->args[2]);
 -            switch (tmp) {
 -            case 0:
 +            i = do_constant_folding_cond(opc, op->args[0],
 +                                         op->args[1], op->args[2]);
 +            if (i == 0) {
                  tcg_op_remove(s, op);
                  continue;
 -            case 1:
 +            } else if (i > 0) {
                  memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                  op->opc = opc = INDEX_op_br;
                  op->args[0] = op->args[3];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          CASE_OP_32_64(movcond):
 -            tmp = do_constant_folding_cond(opc, op->args[1],
 -                                           op->args[2], op->args[5]);
 -            if (tmp != 2) {
 -                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
 +            i = do_constant_folding_cond(opc, op->args[1],
 +                                         op->args[2], op->args[5]);
 +            if (i >= 0) {
 +                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
                  continue;
              }
              if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          case INDEX_op_brcond2_i32:
 -            tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
 -                                            op->args[4]);
 -            if (tmp == 0) {
 +            i = do_constant_folding_cond2(&op->args[0], &op->args[2],
 +                                          op->args[4]);
 +            if (i == 0) {
              do_brcond_false:
                  tcg_op_remove(s, op);
                  continue;
              }
 -            if (tmp == 1) {
 +            if (i > 0) {
              do_brcond_true:
                  op->opc = opc = INDEX_op_br;
                  op->args[0] = op->args[5];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (op->args[4] == TCG_COND_EQ) {
                  /* Simplify EQ comparisons where one of the pairs
                     can be simplified.  */
 -                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
 -                                               op->args[0], op->args[2],
 -                                               TCG_COND_EQ);
 -                if (tmp == 0) {
 +                i = do_constant_folding_cond(INDEX_op_brcond_i32,
 +                                             op->args[0], op->args[2],
 +                                             TCG_COND_EQ);
 +                if (i == 0) {
                      goto do_brcond_false;
 -                } else if (tmp == 1) {
 +                } else if (i > 0) {
                      goto do_brcond_high;
                  }
 -                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
 -                                               op->args[1], op->args[3],
 -                                               TCG_COND_EQ);
 -                if (tmp == 0) {
 +                i = do_constant_folding_cond(INDEX_op_brcond_i32,
 +                                             op->args[1], op->args[3],
 +                                             TCG_COND_EQ);
 +                if (i == 0) {
                      goto do_brcond_false;
 -                } else if (tmp != 1) {
 +                } else if (i < 0) {
                      break;
                  }
              do_brcond_low:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (op->args[4] == TCG_COND_NE) {
                  /* Simplify NE comparisons where one of the pairs
                     can be simplified.  */
 -                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
 -                                               op->args[0], op->args[2],
 -                                               TCG_COND_NE);
 -                if (tmp == 0) {
 +                i = do_constant_folding_cond(INDEX_op_brcond_i32,
 +                                             op->args[0], op->args[2],
 +                                             TCG_COND_NE);
 +                if (i == 0) {
                      goto do_brcond_high;
 -                } else if (tmp == 1) {
 +                } else if (i > 0) {
                      goto do_brcond_true;
                  }
 -                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
 -                                               op->args[1], op->args[3],
 -                                               TCG_COND_NE);
 -                if (tmp == 0) {
 +                i = do_constant_folding_cond(INDEX_op_brcond_i32,
 +                                             op->args[1], op->args[3],
 +                                             TCG_COND_NE);
 +                if (i == 0) {
                      goto do_brcond_low;
 -                } else if (tmp == 1) {
 +                } else if (i > 0) {
                      goto do_brcond_true;
                  }
              }
              break;
          case INDEX_op_setcond2_i32:
 -            tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
 -                                            op->args[5]);
 -            if (tmp != 2) {
 +            i = do_constant_folding_cond2(&op->args[1], &op->args[3],
 +                                          op->args[5]);
 +            if (i >= 0) {
              do_setcond_const:
 -                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], i);
                  continue;
              }
              if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (op->args[5] == TCG_COND_EQ) {
                  /* Simplify EQ comparisons where one of the pairs
                     can be simplified.  */
 -                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
 -                                               op->args[1], op->args[3],
 -                                               TCG_COND_EQ);
 -                if (tmp == 0) {
 +                i = do_constant_folding_cond(INDEX_op_setcond_i32,
 +                                             op->args[1], op->args[3],
 +                                             TCG_COND_EQ);
 +                if (i == 0) {
                      goto do_setcond_const;
 -                } else if (tmp == 1) {
 +                } else if (i > 0) {
                      goto do_setcond_high;
                  }
 -                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
 -                                               op->args[2], op->args[4],
 -                                               TCG_COND_EQ);
 -                if (tmp == 0) {
 +                i = do_constant_folding_cond(INDEX_op_setcond_i32,
 +                                             op->args[2], op->args[4],
 +                                             TCG_COND_EQ);
 +                if (i == 0) {
                      goto do_setcond_high;
 -                } else if (tmp != 1) {
 +                } else if (i < 0) {
                      break;
                  }
              do_setcond_low:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (op->args[5] == TCG_COND_NE) {
                  /* Simplify NE comparisons where one of the pairs
                     can be simplified.  */
 -                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
 -                                               op->args[1], op->args[3],
 -                                               TCG_COND_NE);
 -                if (tmp == 0) {
 +                i = do_constant_folding_cond(INDEX_op_setcond_i32,
 +                                             op->args[1], op->args[3],
 +                                             TCG_COND_NE);
 +                if (i == 0) {
                      goto do_setcond_high;
 -                } else if (tmp == 1) {
 +                } else if (i > 0) {
                      goto do_setcond_const;
                  }
 -                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
 -                                               op->args[2], op->args[4],
 -                                               TCG_COND_NE);
 -                if (tmp == 0) {
 +                i = do_constant_folding_cond(INDEX_op_setcond_i32,
 +                                             op->args[2], op->args[4],
 +                                             TCG_COND_NE);
 +                if (i == 0) {
                      goto do_setcond_low;
 -                } else if (tmp == 1) {
 +                } else if (i > 0) {
                      goto do_setcond_const;
                  }
              }
 --
-.25.1
+.43.0

-[PULL 49/56] tcg/optimize: Use fold_xi_to_x for mul
+[PULL 10/68] target/tricore: Use tcg_op_supported
-Recognize the identity function for low-part multiply.
+Do not reference TCG_TARGET_HAS_* directly.
-Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 3 ++-
+ target/tricore/translate.c | 4 ++--
-file changed, 2 insertions(+), 1 deletion(-)
+file changed, 2 insertions(+), 2 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/target/tricore/translate.c b/target/tricore/translate.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/target/tricore/translate.c
-+++ b/tcg/optimize.c
++++ b/target/tricore/translate.c
-@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ static void decode_bit_andacc(DisasContext *ctx)
- static bool fold_mul(OptContext *ctx, TCGOp *op)
+                     pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_and_tl);
- {
+         break;
-     if (fold_const2(ctx, op) ||
+     case OPC2_32_BIT_AND_NOR_T:
--        fold_xi_to_i(ctx, op, 0)) {
+-        if (TCG_TARGET_HAS_andc_i32) {
-+        fold_xi_to_i(ctx, op, 0) ||
++        if (tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0)) {
-+        fold_xi_to_x(ctx, op, 1)) {
+             gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
-         return true;
+                         pos1, pos2, &tcg_gen_or_tl, &tcg_gen_andc_tl);
-     }
+         } else {
-     return false;
+@@ -XXX,XX +XXX,XX @@ static void decode_bit_orand(DisasContext *ctx)
                      pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_or_tl);
          break;
      case OPC2_32_BIT_OR_NOR_T:
 -        if (TCG_TARGET_HAS_orc_i32) {
 +        if (tcg_op_supported(INDEX_op_orc_i32, TCG_TYPE_I32, 0)) {
              gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
                          pos1, pos2, &tcg_gen_or_tl, &tcg_gen_orc_tl);
          } else {
 --
-.25.1
+.43.0

-[PULL 40/56] tcg/optimize: Split out fold_sub_to_neg
+[PULL 11/68] tcg: Add tcg_op_deposit_valid
-Even though there is only one user, place this more complex
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 conversion into its own helper.
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
+ include/tcg/tcg.h |  6 ++++++
-file changed, 47 insertions(+), 42 deletions(-)
+ tcg/tcg.c         | 21 +++++++++++++++++++++
 files changed, 27 insertions(+)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ typedef struct TCGTargetOpDef {
+  * on which we are currently executing.
- static bool fold_neg(OptContext *ctx, TCGOp *op)
+  */
- {
+ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags);
--    return fold_const1(ctx, op);
++/*
-+    if (fold_const1(ctx, op)) {
++ * tcg_op_deposit_valid:
-+        return true;
++ * Query if a deposit into (ofs, len) is supported for @type by
-+    }
++ * the host on which we are currently executing.
-+    /*
++ */
-+     * Because of fold_sub_to_neg, we want to always return true,
++bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len);
-+     * via finish_folding.
-+     */
+ void tcg_gen_call0(void *func, TCGHelperInfo *, TCGTemp *ret);
-+    finish_folding(ctx, op);
+ void tcg_gen_call1(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *);
-+    return true;
+diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
      }
  }
- static bool fold_nor(OptContext *ctx, TCGOp *op)
++bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
      return fold_const2(ctx, op);
  }
 +static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
 +{
-+    TCGOpcode neg_op;
++    tcg_debug_assert(len > 0);
-+    bool have_neg;
++    switch (type) {
 +
 +    if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
 +        return false;
 +    }
 +
 +    switch (ctx->type) {
 +    case TCG_TYPE_I32:
-+        neg_op = INDEX_op_neg_i32;
++        tcg_debug_assert(ofs < 32);
-+        have_neg = TCG_TARGET_HAS_neg_i32;
++        tcg_debug_assert(len <= 32);
-+        break;
++        tcg_debug_assert(ofs + len <= 32);
 +        return TCG_TARGET_HAS_deposit_i32 &&
 +               TCG_TARGET_deposit_i32_valid(ofs, len);
 +    case TCG_TYPE_I64:
-+        neg_op = INDEX_op_neg_i64;
++        tcg_debug_assert(ofs < 64);
-+        have_neg = TCG_TARGET_HAS_neg_i64;
++        tcg_debug_assert(len <= 64);
-+        break;
++        tcg_debug_assert(ofs + len <= 64);
-+    case TCG_TYPE_V64:
++        return TCG_TARGET_HAS_deposit_i64 &&
-+    case TCG_TYPE_V128:
++               TCG_TARGET_deposit_i64_valid(ofs, len);
 +    case TCG_TYPE_V256:
 +        neg_op = INDEX_op_neg_vec;
 +        have_neg = (TCG_TARGET_HAS_neg_vec &&
 +                    tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
-+    if (have_neg) {
-+        op->opc = neg_op;
-+        op->args[1] = op->args[2];
-+        return fold_neg(ctx, op);
-+    }
-+    return false;
 +}
 +
- static bool fold_sub(OptContext *ctx, TCGOp *op)
+ static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
- {
-     if (fold_const2(ctx, op) ||
+ static void tcg_gen_callN(void *func, TCGHelperInfo *info,
 -        fold_xx_to_i(ctx, op, 0)) {
 +        fold_xx_to_i(ctx, op, 0) ||
 +        fold_sub_to_neg(ctx, op)) {
          return true;
      }
      return false;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  continue;
              }
              break;
 -        CASE_OP_32_64_VEC(sub):
 -            {
 -                TCGOpcode neg_op;
 -                bool have_neg;
 -
 -                if (arg_is_const(op->args[2])) {
 -                    /* Proceed with possible constant folding. */
 -                    break;
 -                }
 -                switch (ctx.type) {
 -                case TCG_TYPE_I32:
 -                    neg_op = INDEX_op_neg_i32;
 -                    have_neg = TCG_TARGET_HAS_neg_i32;
 -                    break;
 -                case TCG_TYPE_I64:
 -                    neg_op = INDEX_op_neg_i64;
 -                    have_neg = TCG_TARGET_HAS_neg_i64;
 -                    break;
 -                case TCG_TYPE_V64:
 -                case TCG_TYPE_V128:
 -                case TCG_TYPE_V256:
 -                    neg_op = INDEX_op_neg_vec;
 -                    have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
 -                                                   TCGOP_VECE(op)) > 0;
 -                    break;
 -                default:
 -                    g_assert_not_reached();
 -                }
 -                if (!have_neg) {
 -                    break;
 -                }
 -                if (arg_is_const(op->args[1])
 -                    && arg_info(op->args[1])->val == 0) {
 -                    op->opc = neg_op;
 -                    reset_temp(op->args[0]);
 -                    op->args[1] = op->args[2];
 -                    continue;
 -                }
 -            }
 -            break;
          default:
              break;
          }
 --
-.25.1
+.43.0

-[PULL 48/56] tcg/optimize: Use fold_xx_to_i for orc
+[PULL 12/68] target/i386: Remove TCG_TARGET_extract_tl_valid
-Recognize the constant function for or-complement.
+This macro is unused.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 1 +
+ target/i386/tcg/emit.c.inc | 2 --
-file changed, 1 insertion(+)
+file changed, 2 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/target/i386/tcg/emit.c.inc
-+++ b/tcg/optimize.c
++++ b/target/i386/tcg/emit.c.inc
-@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@
- static bool fold_orc(OptContext *ctx, TCGOp *op)
+ #ifdef TARGET_X86_64
- {
+ #define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i64
-     if (fold_const2(ctx, op) ||
+ #define TCG_TARGET_deposit_tl_valid     TCG_TARGET_deposit_i64_valid
-+        fold_xx_to_i(ctx, op, -1) ||
+-#define TCG_TARGET_extract_tl_valid     TCG_TARGET_extract_i64_valid
-         fold_xi_to_x(ctx, op, -1) ||
+ #else
-         fold_ix_to_not(ctx, op, 0)) {
+ #define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i32
-         return true;
+ #define TCG_TARGET_deposit_tl_valid     TCG_TARGET_deposit_i32_valid
 -#define TCG_TARGET_extract_tl_valid     TCG_TARGET_extract_i32_valid
  #endif
  #define MMX_OFFSET(reg)                        \
 --
-.25.1
+.43.0

-[PULL 54/56] tcg/optimize: Propagate sign info for setcond
+[PULL 13/68] target/i386: Use tcg_op_deposit_valid
-The result is either 0 or 1, which means that we have
+Avoid direct usage of TCG_TARGET_deposit_*_valid.
 a 2 bit signed result, and thus 62 bits of sign.
 For clarity, use the smask_from_zmask function.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 2 ++
+ target/i386/tcg/emit.c.inc | 6 ++----
-file changed, 2 insertions(+)
+file changed, 2 insertions(+), 4 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/target/i386/tcg/emit.c.inc
-+++ b/tcg/optimize.c
++++ b/target/i386/tcg/emit.c.inc
-@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@
   */
  #ifdef TARGET_X86_64
  #define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i64
 -#define TCG_TARGET_deposit_tl_valid     TCG_TARGET_deposit_i64_valid
  #else
  #define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i32
 -#define TCG_TARGET_deposit_tl_valid     TCG_TARGET_deposit_i32_valid
  #endif
  #define MMX_OFFSET(reg)                        \
@@ -XXX,XX +XXX,XX @@ static void gen_RCL(DisasContext *s, X86DecodedInsn *decode)
      }
-     ctx->z_mask = 1;
+     /* Compute high part, including incoming carry.  */
-+    ctx->s_mask = smask_from_zmask(1);
+-    if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
-     return false;
++    if (!have_1bit_cin || tcg_op_deposit_valid(TCG_TYPE_TL, 1, TARGET_LONG_BITS - 1)) {
- }
+         /* high = (T0 << 1) | cin */
+         TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
-@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
+         tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
@@ -XXX,XX +XXX,XX @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
      }
-     ctx->z_mask = 1;
+     /* Save incoming carry into high, it will be shifted later.  */
-+    ctx->s_mask = smask_from_zmask(1);
+-    if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
-     return false;
++    if (!have_1bit_cin || tcg_op_deposit_valid(TCG_TYPE_TL, 1, TARGET_LONG_BITS - 1)) {
+         TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
-  do_setcond_const:
+         tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
      } else {
 --
-.25.1
+.43.0

-[PULL 47/56] tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
+[PULL 14/68] target/i386: Use tcg_op_supported
-This "garbage" setting pre-dates the addition of the type
+Do not reference TCG_TARGET_HAS_* directly.
 changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
 and INDEX_op_extr{l,h}_i64_i32.
-So now we have a definitive points at which to adjust z_mask
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 to eliminate such bits from the 32-bit operands.
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 35 ++++++++++++++++-------------------
+ target/i386/tcg/emit.c.inc | 6 +++---
-file changed, 16 insertions(+), 19 deletions(-)
+file changed, 3 insertions(+), 3 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/target/i386/tcg/emit.c.inc
-+++ b/tcg/optimize.c
++++ b/target/i386/tcg/emit.c.inc
-@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
+@@ -XXX,XX +XXX,XX @@
-         ti->is_const = true;
+  * The exact opcode to check depends on 32- vs. 64-bit.
-         ti->val = ts->val;
+  */
-         ti->z_mask = ts->val;
+ #ifdef TARGET_X86_64
--        if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
+-#define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i64
--            /* High bits of a 32-bit quantity are garbage.  */
++#define INDEX_op_extract2_tl            INDEX_op_extract2_i64
--            ti->z_mask |= ~0xffffffffull;
+ #else
--        }
+-#define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i32
-     } else {
++#define INDEX_op_extract2_tl            INDEX_op_extract2_i32
-         ti->is_const = false;
+ #endif
-         ti->z_mask = -1;
-@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+ #define MMX_OFFSET(reg)                        \
-     TCGTemp *src_ts = arg_temp(src);
+@@ -XXX,XX +XXX,XX @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode)
-     TempOptInfo *di;
+     tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1)));
-     TempOptInfo *si;
+     while (vec_len > 8) {
--    uint64_t z_mask;
+         vec_len -= 8;
-     TCGOpcode new_op;
+-        if (TCG_TARGET_HAS_extract2_tl) {
++        if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) {
-     if (ts_are_copies(dst_ts, src_ts)) {
+             /*
-@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+              * Load the next byte of the result into the high byte of T.
-     op->args[0] = dst;
+              * TCG does a similar expansion of deposit to shl+extract2; by
      op->args[1] = src;
 -    z_mask = si->z_mask;
 -    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
 -        /* High bits of the destination are now garbage.  */
 -        z_mask |= ~0xffffffffull;
 -    }
 -    di->z_mask = z_mask;
 +    di->z_mask = si->z_mask;
      if (src_ts->type == dst_ts->type) {
          TempOptInfo *ni = ts_info(si->next_copy);
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
  static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
                               TCGArg dst, uint64_t val)
  {
 -    /* Convert movi to mov with constant temp. */
 -    TCGTemp *tv = tcg_constant_internal(ctx->type, val);
 +    TCGTemp *tv;
 +    if (ctx->type == TCG_TYPE_I32) {
 +        val = (int32_t)val;
 +    }
 +
 +    /* Convert movi to mov with constant temp. */
 +    tv = tcg_constant_internal(ctx->type, val);
      init_ts_info(ctx, tv);
      return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
  }
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
      uint64_t z_mask = ctx->z_mask;
      /*
 -     * 32-bit ops generate 32-bit results.  For the result is zero test
 -     * below, we can ignore high bits, but for further optimizations we
 -     * need to record that the high bits contain garbage.
 +     * 32-bit ops generate 32-bit results, which for the purpose of
 +     * simplifying tcg are sign-extended.  Certainly that's how we
 +     * represent our constants elsewhere.  Note that the bits will
 +     * be reset properly for a 64-bit value when encountering the
 +     * type changing opcodes.
       */
      if (ctx->type == TCG_TYPE_I32) {
 -        ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
 -        a_mask &= MAKE_64BIT_MASK(0, 32);
 -        z_mask &= MAKE_64BIT_MASK(0, 32);
 +        a_mask = (int32_t)a_mask;
 +        z_mask = (int32_t)z_mask;
 +        ctx->z_mask = z_mask;
      }
      if (z_mask == 0) {
 --
-.25.1
+.43.0

-[PULL 03/56] host-utils: move udiv_qrnnd() to host-utils
+[PULL 15/68] tcg: Remove TCG_TARGET_NEED_LDST_LABELS and TCG_TARGET_NEED_POOL_LABELS
-From: Luis Pires <luis.pires@eldorado.org.br>
+Make these features unconditional, as they're used by most
 tcg backends anyway.  Merge tcg-ldst.c.inc and tcg-pool.c.inc
 into tcg.c and mark some of the functions unused, so that
 when the features are not used we won't get Werrors.
-Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 so it can be reused by divu128().
 Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/fpu/softfloat-macros.h | 82 ----------------------------------
+ include/tcg/tcg.h                |   4 -
- include/qemu/host-utils.h      | 81 +++++++++++++++++++++++++++++++++
+ tcg/aarch64/tcg-target.h         |   2 -
-files changed, 81 insertions(+), 82 deletions(-)
+ tcg/arm/tcg-target.h             |   2 -
  tcg/i386/tcg-target.h            |   2 -
  tcg/loongarch64/tcg-target.h     |   2 -
  tcg/mips/tcg-target.h            |   2 -
  tcg/ppc/tcg-target.h             |   2 -
  tcg/riscv/tcg-target.h           |   3 -
  tcg/s390x/tcg-target.h           |   2 -
  tcg/sparc64/tcg-target.h         |   2 -
  tcg/tcg.c                        | 211 +++++++++++++++++++++++++++++--
  tcg/aarch64/tcg-target.c.inc     |   2 -
  tcg/arm/tcg-target.c.inc         |   2 -
  tcg/i386/tcg-target.c.inc        |   3 -
  tcg/loongarch64/tcg-target.c.inc |   9 +-
  tcg/mips/tcg-target.c.inc        |   3 -
  tcg/ppc/tcg-target.c.inc         |   2 -
  tcg/riscv/tcg-target.c.inc       |   3 -
  tcg/s390x/tcg-target.c.inc       |   2 -
  tcg/sparc64/tcg-target.c.inc     |   3 -
  tcg/tcg-ldst.c.inc               |  65 ----------
  tcg/tcg-pool.c.inc               | 162 ------------------------
  tcg/tci/tcg-target.c.inc         |  12 +-
 files changed, 216 insertions(+), 286 deletions(-)
  delete mode 100644 tcg/tcg-ldst.c.inc
  delete mode 100644 tcg/tcg-pool.c.inc
-diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/fpu/softfloat-macros.h
+--- a/include/tcg/tcg.h
-+++ b/include/fpu/softfloat-macros.h
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ struct TCGContext {
-  * so some portions are provided under:
+     CPUState *cpu;                      /* *_trans */
-  *  the SoftFloat-2a license
-  *  the BSD license
+     /* These structures are private to tcg-target.c.inc.  */
-- *  GPL-v2-or-later
+-#ifdef TCG_TARGET_NEED_LDST_LABELS
-  *
+     QSIMPLEQ_HEAD(, TCGLabelQemuLdst) ldst_labels;
-  * Any future contributions to this file after December 1st 2014 will be
+-#endif
-  * taken to be licensed under the Softfloat-2a license unless specifically
+-#ifdef TCG_TARGET_NEED_POOL_LABELS
-@@ -XXX,XX +XXX,XX @@ this code that are retained.
+     struct TCGLabelPoolData *pool_labels;
-  * THE POSSIBILITY OF SUCH DAMAGE.
+-#endif
      TCGLabel *exitreq_label;
 diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target.h
 +++ b/tcg/aarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_TARGET_HAS_tst_vec          1
  #define TCG_TARGET_DEFAULT_MO (0)
 -#define TCG_TARGET_NEED_LDST_LABELS
 -#define TCG_TARGET_NEED_POOL_LABELS
  #endif /* AARCH64_TCG_TARGET_H */
 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.h
 +++ b/tcg/arm/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
  #define TCG_TARGET_HAS_tst_vec          1
  #define TCG_TARGET_DEFAULT_MO (0)
 -#define TCG_TARGET_NEED_LDST_LABELS
 -#define TCG_TARGET_NEED_POOL_LABELS
  #endif
 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.h
 +++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #include "tcg/tcg-mo.h"
  #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 -#define TCG_TARGET_NEED_LDST_LABELS
 -#define TCG_TARGET_NEED_POOL_LABELS
  #endif
 diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.h
 +++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_TARGET_DEFAULT_MO (0)
 -#define TCG_TARGET_NEED_LDST_LABELS
 -
  #endif /* LOONGARCH_TCG_TARGET_H */
 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.h
 +++ b/tcg/mips/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
  #define TCG_TARGET_HAS_tst              0
  #define TCG_TARGET_DEFAULT_MO           0
 -#define TCG_TARGET_NEED_LDST_LABELS
 -#define TCG_TARGET_NEED_POOL_LABELS
  #endif
 diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target.h
 +++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_TARGET_HAS_tst_vec          0
  #define TCG_TARGET_DEFAULT_MO (0)
 -#define TCG_TARGET_NEED_LDST_LABELS
 -#define TCG_TARGET_NEED_POOL_LABELS
  #endif
 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target.h
 +++ b/tcg/riscv/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_TARGET_DEFAULT_MO (0)
 -#define TCG_TARGET_NEED_LDST_LABELS
 -#define TCG_TARGET_NEED_POOL_LABELS
 -
  #endif
 diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target.h
 +++ b/tcg/s390x/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
  #define TCG_TARGET_HAS_tst_vec        0
  #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 -#define TCG_TARGET_NEED_LDST_LABELS
 -#define TCG_TARGET_NEED_POOL_LABELS
  #endif
 diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc64/tcg-target.h
 +++ b/tcg/sparc64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
  #define TCG_AREG0 TCG_REG_I0
  #define TCG_TARGET_DEFAULT_MO (0)
 -#define TCG_TARGET_NEED_LDST_LABELS
 -#define TCG_TARGET_NEED_POOL_LABELS
  #endif
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s);
  static void tcg_target_qemu_prologue(TCGContext *s);
  static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
                          intptr_t value, intptr_t addend);
 +static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
 +
 +typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
 +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
 +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
  /* The CIE and FDE header definitions will be common to all hosts.  */
  typedef struct {
@@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED {
      DebugFrameFDEHeader fde;
  } DebugFrameHeader;
 -typedef struct TCGLabelQemuLdst {
 +struct TCGLabelQemuLdst {
      bool is_ld;             /* qemu_ld: true, qemu_st: false */
      MemOpIdx oi;
      TCGType type;           /* result type of a load */
@@ -XXX,XX +XXX,XX @@ typedef struct TCGLabelQemuLdst {
      const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
      tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
      QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
 -} TCGLabelQemuLdst;
 +};
  static void tcg_register_jit_int(const void *buf, size_t size,
                                   const void *debug_frame,
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
  static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
  static bool tcg_target_const_match(int64_t val, int ct,
                                     TCGType type, TCGCond cond, int vece);
 -#ifdef TCG_TARGET_NEED_LDST_LABELS
 -static int tcg_out_ldst_finalize(TCGContext *s);
 -#endif
  #ifndef CONFIG_USER_ONLY
  #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
      }
  }
 +/*
 + * Allocate a new TCGLabelQemuLdst entry.
 + */
 +
 +__attribute__((unused))
 +static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
 +{
 +    TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
 +
 +    memset(l, 0, sizeof(*l));
 +    QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
 +
 +    return l;
 +}
 +
 +/*
 + * Allocate new constant pool entries.
 + */
 +
 +typedef struct TCGLabelPoolData {
 +    struct TCGLabelPoolData *next;
 +    tcg_insn_unit *label;
 +    intptr_t addend;
 +    int rtype;
 +    unsigned nlong;
 +    tcg_target_ulong data[];
 +} TCGLabelPoolData;
 +
 +static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
 +                                        tcg_insn_unit *label, intptr_t addend)
 +{
 +    TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
 +                                     + sizeof(tcg_target_ulong) * nlong);
 +
 +    n->label = label;
 +    n->addend = addend;
 +    n->rtype = rtype;
 +    n->nlong = nlong;
 +    return n;
 +}
 +
 +static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
 +{
 +    TCGLabelPoolData *i, **pp;
 +    int nlong = n->nlong;
 +
 +    /* Insertion sort on the pool.  */
 +    for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
 +        if (nlong > i->nlong) {
 +            break;
 +        }
 +        if (nlong < i->nlong) {
 +            continue;
 +        }
 +        if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
 +            break;
 +        }
 +    }
 +    n->next = *pp;
 +    *pp = n;
 +}
 +
 +/* The "usual" for generic integer code.  */
 +__attribute__((unused))
 +static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
 +                           tcg_insn_unit *label, intptr_t addend)
 +{
 +    TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
 +    n->data[0] = d;
 +    new_pool_insert(s, n);
 +}
 +
 +/* For v64 or v128, depending on the host.  */
 +__attribute__((unused))
 +static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
 +                        intptr_t addend, tcg_target_ulong d0,
 +                        tcg_target_ulong d1)
 +{
 +    TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
 +    n->data[0] = d0;
 +    n->data[1] = d1;
 +    new_pool_insert(s, n);
 +}
 +
 +/* For v128 or v256, depending on the host.  */
 +__attribute__((unused))
 +static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
 +                        intptr_t addend, tcg_target_ulong d0,
 +                        tcg_target_ulong d1, tcg_target_ulong d2,
 +                        tcg_target_ulong d3)
 +{
 +    TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
 +    n->data[0] = d0;
 +    n->data[1] = d1;
 +    n->data[2] = d2;
 +    n->data[3] = d3;
 +    new_pool_insert(s, n);
 +}
 +
 +/* For v256, for 32-bit host.  */
 +__attribute__((unused))
 +static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
 +                        intptr_t addend, tcg_target_ulong d0,
 +                        tcg_target_ulong d1, tcg_target_ulong d2,
 +                        tcg_target_ulong d3, tcg_target_ulong d4,
 +                        tcg_target_ulong d5, tcg_target_ulong d6,
 +                        tcg_target_ulong d7)
 +{
 +    TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
 +    n->data[0] = d0;
 +    n->data[1] = d1;
 +    n->data[2] = d2;
 +    n->data[3] = d3;
 +    n->data[4] = d4;
 +    n->data[5] = d5;
 +    n->data[6] = d6;
 +    n->data[7] = d7;
 +    new_pool_insert(s, n);
 +}
 +
 +/*
 + * Generate TB finalization at the end of block
 + */
 +
 +static int tcg_out_ldst_finalize(TCGContext *s)
 +{
 +    TCGLabelQemuLdst *lb;
 +
 +    /* qemu_ld/st slow paths */
 +    QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
 +        if (lb->is_ld
 +            ? !tcg_out_qemu_ld_slow_path(s, lb)
 +            : !tcg_out_qemu_st_slow_path(s, lb)) {
 +            return -2;
 +        }
 +
 +        /*
 +         * Test for (pending) buffer overflow.  The assumption is that any
 +         * one operation beginning below the high water mark cannot overrun
 +         * the buffer completely.  Thus we can test for overflow after
 +         * generating code without having to check during generation.
 +         */
 +        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
 +            return -1;
 +        }
 +    }
 +    return 0;
 +}
 +
 +static int tcg_out_pool_finalize(TCGContext *s)
 +{
 +    TCGLabelPoolData *p = s->pool_labels;
 +    TCGLabelPoolData *l = NULL;
 +    void *a;
 +
 +    if (p == NULL) {
 +        return 0;
 +    }
 +
 +    /*
 +     * ??? Round up to qemu_icache_linesize, but then do not round
 +     * again when allocating the next TranslationBlock structure.
 +     */
 +    a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
 +                         sizeof(tcg_target_ulong) * p->nlong);
 +    tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
 +    s->data_gen_ptr = a;
 +
 +    for (; p != NULL; p = p->next) {
 +        size_t size = sizeof(tcg_target_ulong) * p->nlong;
 +        uintptr_t value;
 +
 +        if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
 +            if (unlikely(a > s->code_gen_highwater)) {
 +                return -1;
 +            }
 +            memcpy(a, p->data, size);
 +            a += size;
 +            l = p;
 +        }
 +
 +        value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
 +        if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
 +            return -2;
 +        }
 +    }
 +
 +    s->code_ptr = a;
 +    return 0;
 +}
 +
  #define C_PFX1(P, A)                    P##A
  #define C_PFX2(P, A, B)                 P##A##_##B
  #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
      s->code_ptr = s->code_buf;
      s->data_gen_ptr = NULL;
 -#ifdef TCG_TARGET_NEED_LDST_LABELS
      QSIMPLEQ_INIT(&s->ldst_labels);
 -#endif
 -#ifdef TCG_TARGET_NEED_POOL_LABELS
      s->pool_labels = NULL;
 -#endif
      start_words = s->insn_start_words;
      s->gen_insn_data =
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
      s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
      /* Generate TB finalization at the end of block */
 -#ifdef TCG_TARGET_NEED_LDST_LABELS
      i = tcg_out_ldst_finalize(s);
      if (i < 0) {
          return i;
      }
 -#endif
 -#ifdef TCG_TARGET_NEED_POOL_LABELS
      i = tcg_out_pool_finalize(s);
      if (i < 0) {
          return i;
      }
 -#endif
      if (!tcg_resolve_relocs(s)) {
          return -2;
      }
 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target.c.inc
 +++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
   * See the COPYING file in the top-level directory for details.
   */
--/* Portions of this work are licensed under the terms of the GNU GPL,
+-#include "../tcg-ldst.c.inc"
-- * version 2 or later. See the COPYING file in the top-level directory.
+-#include "../tcg-pool.c.inc"
  #include "qemu/bitops.h"
  /* Used for function call generation. */
 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.c.inc
 +++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
   */
  #include "elf.h"
 -#include "../tcg-ldst.c.inc"
 -#include "../tcg-pool.c.inc"
  int arm_arch = __ARM_ARCH;
 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.c.inc
 +++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
   * THE SOFTWARE.
   */
 -#include "../tcg-ldst.c.inc"
 -#include "../tcg-pool.c.inc"
 -
  /* Used for function call generation. */
  #define TCG_TARGET_STACK_ALIGN 16
  #if defined(_WIN64)
 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.c.inc
 +++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
   * THE SOFTWARE.
   */
 -#include "../tcg-ldst.c.inc"
  #include <asm/hwcap.h>
  /* used for function call generation */
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tb_start(TCGContext *s)
      /* nothing to do */
  }
 +static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 +{
 +    for (int i = 0; i < count; ++i) {
 +        /* Canonical nop is andi r0,r0,0 */
 +        p[i] = OPC_ANDI;
 +    }
 +}
 +
  static void tcg_target_init(TCGContext *s)
  {
      unsigned long hwcap = qemu_getauxval(AT_HWCAP);
 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.c.inc
 +++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
   * THE SOFTWARE.
   */
 -#include "../tcg-ldst.c.inc"
 -#include "../tcg-pool.c.inc"
 -
  /* used for function call generation */
  #define TCG_TARGET_STACK_ALIGN        16
  #if _MIPS_SIM == _ABIO32
 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target.c.inc
 +++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
   */
  #include "elf.h"
 -#include "../tcg-pool.c.inc"
 -#include "../tcg-ldst.c.inc"
  /*
   * Standardize on the _CALL_FOO symbols used by GCC:
 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target.c.inc
 +++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
   * THE SOFTWARE.
   */
 -#include "../tcg-ldst.c.inc"
 -#include "../tcg-pool.c.inc"
 -
  /* Used for function call generation. */
  #define TCG_REG_CALL_STACK              TCG_REG_SP
  #define TCG_TARGET_STACK_ALIGN          16
 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target.c.inc
 +++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
   * THE SOFTWARE.
   */
 -#include "../tcg-ldst.c.inc"
 -#include "../tcg-pool.c.inc"
  #include "elf.h"
  /* Used for function call generation. */
 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc64/tcg-target.c.inc
 +++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  #error "unsupported code generation mode"
  #endif
 -#include "../tcg-ldst.c.inc"
 -#include "../tcg-pool.c.inc"
 -
  /* Used for function call generation. */
  #define TCG_REG_CALL_STACK              TCG_REG_O6
  #define TCG_TARGET_STACK_BIAS           2047
 diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
 deleted file mode 100644
 index XXXXXXX..XXXXXXX
 --- a/tcg/tcg-ldst.c.inc
 +++ /dev/null
@@ -XXX,XX +XXX,XX @@
 -/*
 - * TCG Backend Data: load-store optimization only.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a copy
 - * of this software and associated documentation files (the "Software"), to deal
 - * in the Software without restriction, including without limitation the rights
 - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 - * copies of the Software, and to permit persons to whom the Software is
 - * furnished to do so, subject to the following conditions:
 - *
 - * The above copyright notice and this permission notice shall be included in
 - * all copies or substantial portions of the Software.
 - *
 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 - * THE SOFTWARE.
 - */
 -
- #ifndef FPU_SOFTFLOAT_MACROS_H
+-/*
- #define FPU_SOFTFLOAT_MACROS_H
+- * Generate TB finalization at the end of block
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
  }
 -/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
 - * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
 - *
 - * Licensed under the GPLv2/LGPLv3
 - */
--static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
+-
--                                  uint64_t n0, uint64_t d)
+-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
 -static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
 -
 -static int tcg_out_ldst_finalize(TCGContext *s)
 -{
--#if defined(__x86_64__)
+-    TCGLabelQemuLdst *lb;
--    uint64_t q;
+-
--    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
+-    /* qemu_ld/st slow paths */
--    return q;
+-    QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
--#elif defined(__s390x__) && !defined(__clang__)
+-        if (lb->is_ld
--    /* Need to use a TImode type to get an even register pair for DLGR.  */
+-            ? !tcg_out_qemu_ld_slow_path(s, lb)
--    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
+-            : !tcg_out_qemu_st_slow_path(s, lb)) {
--    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
+-            return -2;
--    *r = n >> 64;
+-        }
--    return n;
+-
--#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
+-        /* Test for (pending) buffer overflow.  The assumption is that any
--    /* From Power ISA 2.06, programming note for divdeu.  */
+-           one operation beginning below the high water mark cannot overrun
--    uint64_t q1, q2, Q, r1, r2, R;
+-           the buffer completely.  Thus we can test for overflow after
--    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
+-           generating code without having to check during generation.  */
--        : "=&r"(q1), "=r"(q2)
+-        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
--        : "r"(n1), "r"(n0), "r"(d));
+-            return -1;
 -    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
 -    r2 = n0 - (q2 * d);
 -    Q = q1 + q2;
 -    R = r1 + r2;
 -    if (R >= d || R < r2) { /* overflow implies R > d */
 -        Q += 1;
 -        R -= d;
 -    }
 -    *r = R;
 -    return Q;
 -#else
 -    uint64_t d0, d1, q0, q1, r1, r0, m;
 -
 -    d0 = (uint32_t)d;
 -    d1 = d >> 32;
 -
 -    r1 = n1 % d1;
 -    q1 = n1 / d1;
 -    m = q1 * d0;
 -    r1 = (r1 << 32) | (n0 >> 32);
 -    if (r1 < m) {
 -        q1 -= 1;
 -        r1 += d;
 -        if (r1 >= d) {
 -            if (r1 < m) {
 -                q1 -= 1;
 -                r1 += d;
 -            }
 -        }
 -    }
--    r1 -= m;
+-    return 0;
--
+-}
--    r0 = r1 % d1;
+-
--    q0 = r1 / d1;
+-/*
--    m = q0 * d0;
+- * Allocate a new TCGLabelQemuLdst entry.
--    r0 = (r0 << 32) | (uint32_t)n0;
+- */
--    if (r0 < m) {
+-
--        q0 -= 1;
+-static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
--        r0 += d;
+-{
--        if (r0 >= d) {
+-    TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
--            if (r0 < m) {
+-
--                q0 -= 1;
+-    memset(l, 0, sizeof(*l));
--                r0 += d;
+-    QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
--            }
+-
 -    return l;
 -}
 diff --git a/tcg/tcg-pool.c.inc b/tcg/tcg-pool.c.inc
 deleted file mode 100644
 index XXXXXXX..XXXXXXX
 --- a/tcg/tcg-pool.c.inc
 +++ /dev/null
@@ -XXX,XX +XXX,XX @@
 -/*
 - * TCG Backend Data: constant pool.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a copy
 - * of this software and associated documentation files (the "Software"), to deal
 - * in the Software without restriction, including without limitation the rights
 - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 - * copies of the Software, and to permit persons to whom the Software is
 - * furnished to do so, subject to the following conditions:
 - *
 - * The above copyright notice and this permission notice shall be included in
 - * all copies or substantial portions of the Software.
 - *
 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 - * THE SOFTWARE.
 - */
 -
 -typedef struct TCGLabelPoolData {
 -    struct TCGLabelPoolData *next;
 -    tcg_insn_unit *label;
 -    intptr_t addend;
 -    int rtype;
 -    unsigned nlong;
 -    tcg_target_ulong data[];
 -} TCGLabelPoolData;
 -
 -
 -static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
 -                                        tcg_insn_unit *label, intptr_t addend)
 -{
 -    TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
 -                                     + sizeof(tcg_target_ulong) * nlong);
 -
 -    n->label = label;
 -    n->addend = addend;
 -    n->rtype = rtype;
 -    n->nlong = nlong;
 -    return n;
 -}
 -
 -static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
 -{
 -    TCGLabelPoolData *i, **pp;
 -    int nlong = n->nlong;
 -
 -    /* Insertion sort on the pool.  */
 -    for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
 -        if (nlong > i->nlong) {
 -            break;
 -        }
 -        if (nlong < i->nlong) {
 -            continue;
 -        }
 -        if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
 -            break;
 -        }
 -    }
--    r0 -= m;
+-    n->next = *pp;
--
+-    *pp = n;
 -    *r = r0;
 -    return (q1 << 32) | q0;
 -#endif
 -}
 -
- /*----------------------------------------------------------------------------
+-/* The "usual" for generic integer code.  */
- | Returns an approximation to the square root of the 32-bit significand given
+-static inline void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
- | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+-                                  tcg_insn_unit *label, intptr_t addend)
-diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
+-{
-index XXXXXXX..XXXXXXX 100644
+-    TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
---- a/include/qemu/host-utils.h
+-    n->data[0] = d;
-+++ b/include/qemu/host-utils.h
+-    new_pool_insert(s, n);
 -}
 -
 -/* For v64 or v128, depending on the host.  */
 -static inline void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
 -                               intptr_t addend, tcg_target_ulong d0,
 -                               tcg_target_ulong d1)
 -{
 -    TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
 -    n->data[0] = d0;
 -    n->data[1] = d1;
 -    new_pool_insert(s, n);
 -}
 -
 -/* For v128 or v256, depending on the host.  */
 -static inline void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
 -                               intptr_t addend, tcg_target_ulong d0,
 -                               tcg_target_ulong d1, tcg_target_ulong d2,
 -                               tcg_target_ulong d3)
 -{
 -    TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
 -    n->data[0] = d0;
 -    n->data[1] = d1;
 -    n->data[2] = d2;
 -    n->data[3] = d3;
 -    new_pool_insert(s, n);
 -}
 -
 -/* For v256, for 32-bit host.  */
 -static inline void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
 -                               intptr_t addend, tcg_target_ulong d0,
 -                               tcg_target_ulong d1, tcg_target_ulong d2,
 -                               tcg_target_ulong d3, tcg_target_ulong d4,
 -                               tcg_target_ulong d5, tcg_target_ulong d6,
 -                               tcg_target_ulong d7)
 -{
 -    TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
 -    n->data[0] = d0;
 -    n->data[1] = d1;
 -    n->data[2] = d2;
 -    n->data[3] = d3;
 -    n->data[4] = d4;
 -    n->data[5] = d5;
 -    n->data[6] = d6;
 -    n->data[7] = d7;
 -    new_pool_insert(s, n);
 -}
 -
 -/* To be provided by cpu/tcg-target.c.inc.  */
 -static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
 -
 -static int tcg_out_pool_finalize(TCGContext *s)
 -{
 -    TCGLabelPoolData *p = s->pool_labels;
 -    TCGLabelPoolData *l = NULL;
 -    void *a;
 -
 -    if (p == NULL) {
 -        return 0;
 -    }
 -
 -    /* ??? Round up to qemu_icache_linesize, but then do not round
 -       again when allocating the next TranslationBlock structure.  */
 -    a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
 -                         sizeof(tcg_target_ulong) * p->nlong);
 -    tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
 -    s->data_gen_ptr = a;
 -
 -    for (; p != NULL; p = p->next) {
 -        size_t size = sizeof(tcg_target_ulong) * p->nlong;
 -        uintptr_t value;
 -
 -        if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
 -            if (unlikely(a > s->code_gen_highwater)) {
 -                return -1;
 -            }
 -            memcpy(a, p->data, size);
 -            a += size;
 -            l = p;
 -        }
 -
 -        value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
 -        if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
 -            return -2;
 -        }
 -    }
 -
 -    s->code_ptr = a;
 -    return 0;
 -}
 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tci/tcg-target.c.inc
 +++ b/tcg/tci/tcg-target.c.inc
 @@ -XXX,XX +XXX,XX @@
   * THE SOFTWARE.
   */
-+/* Portions of this work are licensed under the terms of the GNU GPL,
+-#include "../tcg-pool.c.inc"
-+ * version 2 or later. See the COPYING file in the top-level directory.
+-
-+ */
+ /* Used for function call generation. */
-+
+ #define TCG_TARGET_CALL_STACK_OFFSET    0
- #ifndef HOST_UTILS_H
+ #define TCG_TARGET_STACK_ALIGN          8
- #define HOST_UTILS_H
+@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
+ {
-@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
+     return true;
-  */
+ }
- void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
++
++static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
-+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
++{
-+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
++    g_assert_not_reached();
-+ *
++}
-+ * Licensed under the GPLv2/LGPLv3
++
-+ */
++static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
-+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
++{
-+                                  uint64_t n0, uint64_t d)
++    g_assert_not_reached();
-+{
++}
 +#if defined(__x86_64__)
 +    uint64_t q;
 +    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
 +    return q;
 +#elif defined(__s390x__) && !defined(__clang__)
 +    /* Need to use a TImode type to get an even register pair for DLGR.  */
 +    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
 +    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
 +    *r = n >> 64;
 +    return n;
 +#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
 +    /* From Power ISA 2.06, programming note for divdeu.  */
 +    uint64_t q1, q2, Q, r1, r2, R;
 +    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
 +        : "=&r"(q1), "=r"(q2)
 +        : "r"(n1), "r"(n0), "r"(d));
 +    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
 +    r2 = n0 - (q2 * d);
 +    Q = q1 + q2;
 +    R = r1 + r2;
 +    if (R >= d || R < r2) { /* overflow implies R > d */
 +        Q += 1;
 +        R -= d;
 +    }
 +    *r = R;
 +    return Q;
 +#else
 +    uint64_t d0, d1, q0, q1, r1, r0, m;
 +
 +    d0 = (uint32_t)d;
 +    d1 = d >> 32;
 +
 +    r1 = n1 % d1;
 +    q1 = n1 / d1;
 +    m = q1 * d0;
 +    r1 = (r1 << 32) | (n0 >> 32);
 +    if (r1 < m) {
 +        q1 -= 1;
 +        r1 += d;
 +        if (r1 >= d) {
 +            if (r1 < m) {
 +                q1 -= 1;
 +                r1 += d;
 +            }
 +        }
 +    }
 +    r1 -= m;
 +
 +    r0 = r1 % d1;
 +    q0 = r1 / d1;
 +    m = q0 * d0;
 +    r0 = (r0 << 32) | (uint32_t)n0;
 +    if (r0 < m) {
 +        q0 -= 1;
 +        r0 += d;
 +        if (r0 >= d) {
 +            if (r0 < m) {
 +                q0 -= 1;
 +                r0 += d;
 +            }
 +        }
 +    }
 +    r0 -= m;
 +
 +    *r = r0;
 +    return (q1 << 32) | q0;
 +#endif
 +}
 +
  #endif
 --
-.25.1
+.43.0

-[PULL 44/56] tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
+[PULL 16/68] tcg: Rename tcg-target.opc.h to tcg-target-opc.h.inc
-Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
+In addition, add empty files for mips, sparc64 and tci.
-and muls2_i64.
+Make the include unconditional within tcg-opc.h.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
+ include/tcg/tcg-opc.h                                      | 4 +---
-file changed, 35 insertions(+), 9 deletions(-)
+ tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc}     | 0
  tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc}         | 0
  tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc}        | 0
  tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
  tcg/mips/tcg-target-opc.h.inc                              | 1 +
  tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc}         | 0
  tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc}       | 0
  tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc}       | 0
  tcg/sparc64/tcg-target-opc.h.inc                           | 1 +
  tcg/tci/tcg-target-opc.h.inc                               | 1 +
 files changed, 4 insertions(+), 3 deletions(-)
  rename tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  rename tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  rename tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  rename tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  create mode 100644 tcg/mips/tcg-target-opc.h.inc
  rename tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  rename tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  rename tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
  create mode 100644 tcg/sparc64/tcg-target-opc.h.inc
  create mode 100644 tcg/tci/tcg-target-opc.h.inc
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg-opc.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg-opc.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec))
-     return false;
- }
+ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
--static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
+-#if TCG_TARGET_MAYBE_vec
-+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
+-#include "tcg-target.opc.h"
- {
+-#endif
-     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
++#include "tcg-target-opc.h.inc"
--        uint32_t a = arg_info(op->args[2])->val;
--        uint32_t b = arg_info(op->args[3])->val;
+ #ifdef TCG_TARGET_INTERPRETER
--        uint64_t r = (uint64_t)a * b;
+ /* These opcodes are only for use between the tci generator and interpreter. */
-+        uint64_t a = arg_info(op->args[2])->val;
+diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target-opc.h.inc
-+        uint64_t b = arg_info(op->args[3])->val;
+similarity index 100%
-+        uint64_t h, l;
+rename from tcg/aarch64/tcg-target.opc.h
-         TCGArg rl, rh;
+rename to tcg/aarch64/tcg-target-opc.h.inc
--        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
+diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target-opc.h.inc
-+        TCGOp *op2;
+similarity index 100%
-+
+rename from tcg/arm/tcg-target.opc.h
-+        switch (op->opc) {
+rename to tcg/arm/tcg-target-opc.h.inc
-+        case INDEX_op_mulu2_i32:
+diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target-opc.h.inc
-+            l = (uint64_t)(uint32_t)a * (uint32_t)b;
+similarity index 100%
-+            h = (int32_t)(l >> 32);
+rename from tcg/i386/tcg-target.opc.h
-+            l = (int32_t)l;
+rename to tcg/i386/tcg-target-opc.h.inc
-+            break;
+diff --git a/tcg/loongarch64/tcg-target.opc.h b/tcg/loongarch64/tcg-target-opc.h.inc
-+        case INDEX_op_muls2_i32:
+similarity index 100%
-+            l = (int64_t)(int32_t)a * (int32_t)b;
+rename from tcg/loongarch64/tcg-target.opc.h
-+            h = l >> 32;
+rename to tcg/loongarch64/tcg-target-opc.h.inc
-+            l = (int32_t)l;
+diff --git a/tcg/mips/tcg-target-opc.h.inc b/tcg/mips/tcg-target-opc.h.inc
-+            break;
+new file mode 100644
-+        case INDEX_op_mulu2_i64:
+index XXXXXXX..XXXXXXX
-+            mulu64(&l, &h, a, b);
+--- /dev/null
-+            break;
++++ b/tcg/mips/tcg-target-opc.h.inc
-+        case INDEX_op_muls2_i64:
+@@ -0,0 +1 @@
-+            muls64(&l, &h, a, b);
++/* No target specific opcodes. */
-+            break;
+diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target-opc.h.inc
-+        default:
+similarity index 100%
-+            g_assert_not_reached();
+rename from tcg/ppc/tcg-target.opc.h
-+        }
+rename to tcg/ppc/tcg-target-opc.h.inc
+diff --git a/tcg/riscv/tcg-target.opc.h b/tcg/riscv/tcg-target-opc.h.inc
-         rl = op->args[0];
+similarity index 100%
-         rh = op->args[1];
+rename from tcg/riscv/tcg-target.opc.h
--        tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
+rename to tcg/riscv/tcg-target-opc.h.inc
--        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
+diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target-opc.h.inc
-+
+similarity index 100%
-+        /* The proper opcode is supplied by tcg_opt_gen_mov. */
+rename from tcg/s390x/tcg-target.opc.h
-+        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+rename to tcg/s390x/tcg-target-opc.h.inc
-+
+diff --git a/tcg/sparc64/tcg-target-opc.h.inc b/tcg/sparc64/tcg-target-opc.h.inc
-+        tcg_opt_gen_movi(ctx, op, rl, l);
+new file mode 100644
-+        tcg_opt_gen_movi(ctx, op2, rh, h);
+index XXXXXXX..XXXXXXX
-         return true;
+--- /dev/null
-     }
++++ b/tcg/sparc64/tcg-target-opc.h.inc
-     return false;
+@@ -0,0 +1 @@
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++/* No target specific opcodes. */
-         CASE_OP_32_64(muluh):
+diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc
-             done = fold_mul_highpart(&ctx, op);
+new file mode 100644
-             break;
+index XXXXXXX..XXXXXXX
--        case INDEX_op_mulu2_i32:
+--- /dev/null
--            done = fold_mulu2_i32(&ctx, op);
++++ b/tcg/tci/tcg-target-opc.h.inc
-+        CASE_OP_32_64(muls2):
+@@ -0,0 +1 @@
-+        CASE_OP_32_64(mulu2):
++/* No target specific opcodes. */
 +            done = fold_multiply2(&ctx, op);
              break;
          CASE_OP_32_64(nand):
              done = fold_nand(&ctx, op);
 --
-.25.1
+.43.0

-[PULL 42/56] tcg/optimize: Split out fold_ix_to_i
+[PULL 17/68] tcg/tci: Move TCI specific opcodes to tcg-target-opc.h.inc
-Pull the "op r, 0, b => movi r, 0" optimization into a function,
+Now that tcg-target-opc.h.inc is unconditional,
-and use it in fold_shift.
+we can move these out of the generic header.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 28 ++++++++++------------------
+ include/tcg/tcg-opc.h        | 6 ------
-file changed, 10 insertions(+), 18 deletions(-)
+ tcg/tci/tcg-target-opc.h.inc | 5 ++++-
 files changed, 4 insertions(+), 7 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg-opc.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg-opc.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
+@@ -XXX,XX +XXX,XX @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
-     return false;
- }
+ #include "tcg-target-opc.h.inc"
-+/* If the binary operation has first argument @i, fold to @i. */
+-#ifdef TCG_TARGET_INTERPRETER
-+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+-/* These opcodes are only for use between the tci generator and interpreter. */
-+{
+-DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
-+    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+-DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+-#endif
 +    }
 +    return false;
 +}
 +
  /* If the binary operation has first argument @i, fold to NOT. */
  static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
  {
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
  static bool fold_shift(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
 +        fold_ix_to_i(ctx, op, 0) ||
          fold_xi_to_x(ctx, op, 0)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          }
 -        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
 -           and "sub r, 0, a => neg r, a" case.  */
 -        switch (opc) {
 -        CASE_OP_32_64(shl):
 -        CASE_OP_32_64(shr):
 -        CASE_OP_32_64(sar):
 -        CASE_OP_32_64(rotl):
 -        CASE_OP_32_64(rotr):
 -            if (arg_is_const(op->args[1])
 -                && arg_info(op->args[1])->val == 0) {
 -                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
 -                continue;
 -            }
 -            break;
 -        default:
 -            break;
 -        }
 -
-         /* Simplify using known-zero bits. Currently only ops with a single
+ #undef DATA64_ARGS
-            output argument is supported. */
+ #undef IMPL
-         z_mask = -1;
+ #undef IMPL64
 diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tci/tcg-target-opc.h.inc
 +++ b/tcg/tci/tcg-target-opc.h.inc
@@ -1 +1,4 @@
 -/* No target specific opcodes. */
 +/* SPDX-License-Identifier: MIT */
 +/* These opcodes for use between the tci generator and interpreter. */
 +DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
 +DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
 --
-.25.1
+.43.0

-[PULL 37/56] tcg/optimize: Split out fold_xi_to_i
+[PULL 18/68] tcg: Move fallback tcg_can_emit_vec_op out of line
-Pull the "op r, a, 0 => movi r, 0" optimization into a function,
+Don't reference TCG_TARGET_MAYBE_vec in a public header.
 and use it in the outer opcode fold functions.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 38 ++++++++++++++++++++------------------
+ include/tcg/tcg.h | 7 -------
-file changed, 20 insertions(+), 18 deletions(-)
+ tcg/tcg.c         | 4 ++++
 files changed, 4 insertions(+), 7 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ extern tcg_prologue_fn *tcg_qemu_tb_exec;
-     return false;
  void tcg_register_jit(const void *buf, size_t buf_size);
 -#if TCG_TARGET_MAYBE_vec
  /* Return zero if the tuple (opc, type, vece) is unsupportable;
     return > 0 if it is directly supportable;
     return < 0 if we must call tcg_expand_vec_op.  */
  int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned);
 -#else
 -static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
 -{
 -    return 0;
 -}
 -#endif
  /* Expand the tuple (opc, type, vece) on the given arguments.  */
  void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...);
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
  {
      g_assert_not_reached();
  }
++int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
 +/* If the binary operation has second argument @i, fold to @i. */
 +static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 +{
-+    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
++    return 0;
 +        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 +    }
 +    return false;
 +}
-+
+ #endif
- /* If the binary operation has both arguments equal, fold to @i. */
+ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
- static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+                        intptr_t arg2);
  {
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
  static bool fold_and(OptContext *ctx, TCGOp *op)
  {
      if (fold_const2(ctx, op) ||
 +        fold_xi_to_i(ctx, op, 0) ||
          fold_xx_to_x(ctx, op)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
  static bool fold_mul(OptContext *ctx, TCGOp *op)
  {
 -    return fold_const2(ctx, op);
 +    if (fold_const2(ctx, op) ||
 +        fold_xi_to_i(ctx, op, 0)) {
 +        return true;
 +    }
 +    return false;
  }
  static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
  {
 -    return fold_const2(ctx, op);
 +    if (fold_const2(ctx, op) ||
 +        fold_xi_to_i(ctx, op, 0)) {
 +        return true;
 +    }
 +    return false;
  }
  static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              continue;
          }
 -        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
 -        switch (opc) {
 -        CASE_OP_32_64_VEC(and):
 -        CASE_OP_32_64_VEC(mul):
 -        CASE_OP_32_64(muluh):
 -        CASE_OP_32_64(mulsh):
 -            if (arg_is_const(op->args[2])
 -                && arg_info(op->args[2])->val == 0) {
 -                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
 -                continue;
 -            }
 -            break;
 -        default:
 -            break;
 -        }
 -
          /*
           * Process each opcode.
           * Sorted alphabetically by opcode as much as possible.
 --
-.25.1
+.43.0

-New patch
+[PULL 19/68] tcg/ppc: Remove TCGPowerISA enum
+Left-over from commit 623d7e3551a ("util: Add cpuinfo-ppc.c").
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-2-philmd@linaro.org>
+---
+ tcg/ppc/tcg-target.h | 8 --------
+file changed, 8 deletions(-)
+diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/ppc/tcg-target.h
++++ b/tcg/ppc/tcg-target.h
+@@ -XXX,XX +XXX,XX @@ typedef enum {
+     TCG_AREG0 = TCG_REG_R27
+ } TCGReg;
+-typedef enum {
+-    tcg_isa_base,
+-    tcg_isa_2_06,
+-    tcg_isa_2_07,
+-    tcg_isa_3_00,
+-    tcg_isa_3_10,
+-} TCGPowerISA;
+-
+ #define have_isa_2_06  (cpuinfo & CPUINFO_V2_06)
+ #define have_isa_2_07  (cpuinfo & CPUINFO_V2_07)
+ #define have_isa_3_00  (cpuinfo & CPUINFO_V3_0)
+--
+.43.0

-New patch
+[PULL 20/68] tcg: Extract default TCG_TARGET_HAS_foo definitions to 'tcg-has.h'
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-3-philmd@linaro.org>
+---
+ include/tcg/tcg.h | 105 +-----------------------------------------
+ tcg/tcg-has.h     | 115 ++++++++++++++++++++++++++++++++++++++++++++++
+files changed, 116 insertions(+), 104 deletions(-)
+ create mode 100644 tcg/tcg-has.h
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
+ #error unsupported
+ #endif
+-#if TCG_TARGET_REG_BITS == 32
+-/* Turn some undef macros into false macros.  */
+-#define TCG_TARGET_HAS_extr_i64_i32     0
+-#define TCG_TARGET_HAS_div_i64          0
+-#define TCG_TARGET_HAS_rem_i64          0
+-#define TCG_TARGET_HAS_div2_i64         0
+-#define TCG_TARGET_HAS_rot_i64          0
+-#define TCG_TARGET_HAS_ext8s_i64        0
+-#define TCG_TARGET_HAS_ext16s_i64       0
+-#define TCG_TARGET_HAS_ext32s_i64       0
+-#define TCG_TARGET_HAS_ext8u_i64        0
+-#define TCG_TARGET_HAS_ext16u_i64       0
+-#define TCG_TARGET_HAS_ext32u_i64       0
+-#define TCG_TARGET_HAS_bswap16_i64      0
+-#define TCG_TARGET_HAS_bswap32_i64      0
+-#define TCG_TARGET_HAS_bswap64_i64      0
+-#define TCG_TARGET_HAS_not_i64          0
+-#define TCG_TARGET_HAS_andc_i64         0
+-#define TCG_TARGET_HAS_orc_i64          0
+-#define TCG_TARGET_HAS_eqv_i64          0
+-#define TCG_TARGET_HAS_nand_i64         0
+-#define TCG_TARGET_HAS_nor_i64          0
+-#define TCG_TARGET_HAS_clz_i64          0
+-#define TCG_TARGET_HAS_ctz_i64          0
+-#define TCG_TARGET_HAS_ctpop_i64        0
+-#define TCG_TARGET_HAS_deposit_i64      0
+-#define TCG_TARGET_HAS_extract_i64      0
+-#define TCG_TARGET_HAS_sextract_i64     0
+-#define TCG_TARGET_HAS_extract2_i64     0
+-#define TCG_TARGET_HAS_negsetcond_i64   0
+-#define TCG_TARGET_HAS_add2_i64         0
+-#define TCG_TARGET_HAS_sub2_i64         0
+-#define TCG_TARGET_HAS_mulu2_i64        0
+-#define TCG_TARGET_HAS_muls2_i64        0
+-#define TCG_TARGET_HAS_muluh_i64        0
+-#define TCG_TARGET_HAS_mulsh_i64        0
+-/* Turn some undef macros into true macros.  */
+-#define TCG_TARGET_HAS_add2_i32         1
+-#define TCG_TARGET_HAS_sub2_i32         1
+-#endif
+-
+-#ifndef TCG_TARGET_deposit_i32_valid
+-#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
+-#endif
+-#ifndef TCG_TARGET_deposit_i64_valid
+-#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
+-#endif
+-#ifndef TCG_TARGET_extract_i32_valid
+-#define TCG_TARGET_extract_i32_valid(ofs, len) 1
+-#endif
+-#ifndef TCG_TARGET_extract_i64_valid
+-#define TCG_TARGET_extract_i64_valid(ofs, len) 1
+-#endif
+-
+-/* Only one of DIV or DIV2 should be defined.  */
+-#if defined(TCG_TARGET_HAS_div_i32)
+-#define TCG_TARGET_HAS_div2_i32         0
+-#elif defined(TCG_TARGET_HAS_div2_i32)
+-#define TCG_TARGET_HAS_div_i32          0
+-#define TCG_TARGET_HAS_rem_i32          0
+-#endif
+-#if defined(TCG_TARGET_HAS_div_i64)
+-#define TCG_TARGET_HAS_div2_i64         0
+-#elif defined(TCG_TARGET_HAS_div2_i64)
+-#define TCG_TARGET_HAS_div_i64          0
+-#define TCG_TARGET_HAS_rem_i64          0
+-#endif
+-
+-#if !defined(TCG_TARGET_HAS_v64) \
+-    && !defined(TCG_TARGET_HAS_v128) \
+-    && !defined(TCG_TARGET_HAS_v256)
+-#define TCG_TARGET_MAYBE_vec            0
+-#define TCG_TARGET_HAS_abs_vec          0
+-#define TCG_TARGET_HAS_neg_vec          0
+-#define TCG_TARGET_HAS_not_vec          0
+-#define TCG_TARGET_HAS_andc_vec         0
+-#define TCG_TARGET_HAS_orc_vec          0
+-#define TCG_TARGET_HAS_nand_vec         0
+-#define TCG_TARGET_HAS_nor_vec          0
+-#define TCG_TARGET_HAS_eqv_vec          0
+-#define TCG_TARGET_HAS_roti_vec         0
+-#define TCG_TARGET_HAS_rots_vec         0
+-#define TCG_TARGET_HAS_rotv_vec         0
+-#define TCG_TARGET_HAS_shi_vec          0
+-#define TCG_TARGET_HAS_shs_vec          0
+-#define TCG_TARGET_HAS_shv_vec          0
+-#define TCG_TARGET_HAS_mul_vec          0
+-#define TCG_TARGET_HAS_sat_vec          0
+-#define TCG_TARGET_HAS_minmax_vec       0
+-#define TCG_TARGET_HAS_bitsel_vec       0
+-#define TCG_TARGET_HAS_cmpsel_vec       0
+-#define TCG_TARGET_HAS_tst_vec          0
+-#else
+-#define TCG_TARGET_MAYBE_vec            1
+-#endif
+-#ifndef TCG_TARGET_HAS_v64
+-#define TCG_TARGET_HAS_v64              0
+-#endif
+-#ifndef TCG_TARGET_HAS_v128
+-#define TCG_TARGET_HAS_v128             0
+-#endif
+-#ifndef TCG_TARGET_HAS_v256
+-#define TCG_TARGET_HAS_v256             0
+-#endif
++#include "tcg/tcg-has.h"
+ typedef enum TCGOpcode {
+ #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
+diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/tcg/tcg-has.h
+@@ -XXX,XX +XXX,XX @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Define target-specific opcode support
++ * Copyright (c) 2024 Linaro, Ltd.
++ */
++
++#ifndef TCG_HAS_H
++#define TCG_HAS_H
++
++#if TCG_TARGET_REG_BITS == 32
++/* Turn some undef macros into false macros.  */
++#define TCG_TARGET_HAS_extr_i64_i32     0
++#define TCG_TARGET_HAS_div_i64          0
++#define TCG_TARGET_HAS_rem_i64          0
++#define TCG_TARGET_HAS_div2_i64         0
++#define TCG_TARGET_HAS_rot_i64          0
++#define TCG_TARGET_HAS_ext8s_i64        0
++#define TCG_TARGET_HAS_ext16s_i64       0
++#define TCG_TARGET_HAS_ext32s_i64       0
++#define TCG_TARGET_HAS_ext8u_i64        0
++#define TCG_TARGET_HAS_ext16u_i64       0
++#define TCG_TARGET_HAS_ext32u_i64       0
++#define TCG_TARGET_HAS_bswap16_i64      0
++#define TCG_TARGET_HAS_bswap32_i64      0
++#define TCG_TARGET_HAS_bswap64_i64      0
++#define TCG_TARGET_HAS_not_i64          0
++#define TCG_TARGET_HAS_andc_i64         0
++#define TCG_TARGET_HAS_orc_i64          0
++#define TCG_TARGET_HAS_eqv_i64          0
++#define TCG_TARGET_HAS_nand_i64         0
++#define TCG_TARGET_HAS_nor_i64          0
++#define TCG_TARGET_HAS_clz_i64          0
++#define TCG_TARGET_HAS_ctz_i64          0
++#define TCG_TARGET_HAS_ctpop_i64        0
++#define TCG_TARGET_HAS_deposit_i64      0
++#define TCG_TARGET_HAS_extract_i64      0
++#define TCG_TARGET_HAS_sextract_i64     0
++#define TCG_TARGET_HAS_extract2_i64     0
++#define TCG_TARGET_HAS_negsetcond_i64   0
++#define TCG_TARGET_HAS_add2_i64         0
++#define TCG_TARGET_HAS_sub2_i64         0
++#define TCG_TARGET_HAS_mulu2_i64        0
++#define TCG_TARGET_HAS_muls2_i64        0
++#define TCG_TARGET_HAS_muluh_i64        0
++#define TCG_TARGET_HAS_mulsh_i64        0
++/* Turn some undef macros into true macros.  */
++#define TCG_TARGET_HAS_add2_i32         1
++#define TCG_TARGET_HAS_sub2_i32         1
++#endif
++
++#ifndef TCG_TARGET_deposit_i32_valid
++#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
++#endif
++#ifndef TCG_TARGET_deposit_i64_valid
++#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
++#endif
++#ifndef TCG_TARGET_extract_i32_valid
++#define TCG_TARGET_extract_i32_valid(ofs, len) 1
++#endif
++#ifndef TCG_TARGET_extract_i64_valid
++#define TCG_TARGET_extract_i64_valid(ofs, len) 1
++#endif
++
++/* Only one of DIV or DIV2 should be defined.  */
++#if defined(TCG_TARGET_HAS_div_i32)
++#define TCG_TARGET_HAS_div2_i32         0
++#elif defined(TCG_TARGET_HAS_div2_i32)
++#define TCG_TARGET_HAS_div_i32          0
++#define TCG_TARGET_HAS_rem_i32          0
++#endif
++#if defined(TCG_TARGET_HAS_div_i64)
++#define TCG_TARGET_HAS_div2_i64         0
++#elif defined(TCG_TARGET_HAS_div2_i64)
++#define TCG_TARGET_HAS_div_i64          0
++#define TCG_TARGET_HAS_rem_i64          0
++#endif
++
++#if !defined(TCG_TARGET_HAS_v64) \
++    && !defined(TCG_TARGET_HAS_v128) \
++    && !defined(TCG_TARGET_HAS_v256)
++#define TCG_TARGET_MAYBE_vec            0
++#define TCG_TARGET_HAS_abs_vec          0
++#define TCG_TARGET_HAS_neg_vec          0
++#define TCG_TARGET_HAS_not_vec          0
++#define TCG_TARGET_HAS_andc_vec         0
++#define TCG_TARGET_HAS_orc_vec          0
++#define TCG_TARGET_HAS_nand_vec         0
++#define TCG_TARGET_HAS_nor_vec          0
++#define TCG_TARGET_HAS_eqv_vec          0
++#define TCG_TARGET_HAS_roti_vec         0
++#define TCG_TARGET_HAS_rots_vec         0
++#define TCG_TARGET_HAS_rotv_vec         0
++#define TCG_TARGET_HAS_shi_vec          0
++#define TCG_TARGET_HAS_shs_vec          0
++#define TCG_TARGET_HAS_shv_vec          0
++#define TCG_TARGET_HAS_mul_vec          0
++#define TCG_TARGET_HAS_sat_vec          0
++#define TCG_TARGET_HAS_minmax_vec       0
++#define TCG_TARGET_HAS_bitsel_vec       0
++#define TCG_TARGET_HAS_cmpsel_vec       0
++#define TCG_TARGET_HAS_tst_vec          0
++#else
++#define TCG_TARGET_MAYBE_vec            1
++#endif
++#ifndef TCG_TARGET_HAS_v64
++#define TCG_TARGET_HAS_v64              0
++#endif
++#ifndef TCG_TARGET_HAS_v128
++#define TCG_TARGET_HAS_v128             0
++#endif
++#ifndef TCG_TARGET_HAS_v256
++#define TCG_TARGET_HAS_v256             0
++#endif
++
++#endif
+--
+.43.0

-New patch
+[PULL 21/68] tcg/aarch64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-4-philmd@linaro.org>
+---
+ tcg/aarch64/tcg-target-has.h | 119 +++++++++++++++++++++++++++++++++++
+ tcg/aarch64/tcg-target.h     | 109 +-------------------------------
+files changed, 120 insertions(+), 108 deletions(-)
+ create mode 100644 tcg/aarch64/tcg-target-has.h
+diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/tcg/aarch64/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++/*
++ * Define target-specific opcode support
++ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
++ */
++
++#ifndef TCG_TARGET_HAS_H
++#define TCG_TARGET_HAS_H
++
++#include "host/cpuinfo.h"
++
++#define have_lse    (cpuinfo & CPUINFO_LSE)
++#define have_lse2   (cpuinfo & CPUINFO_LSE2)
++
++/* optional instructions */
++#define TCG_TARGET_HAS_div_i32          1
++#define TCG_TARGET_HAS_rem_i32          1
++#define TCG_TARGET_HAS_ext8s_i32        1
++#define TCG_TARGET_HAS_ext16s_i32       1
++#define TCG_TARGET_HAS_ext8u_i32        1
++#define TCG_TARGET_HAS_ext16u_i32       1
++#define TCG_TARGET_HAS_bswap16_i32      1
++#define TCG_TARGET_HAS_bswap32_i32      1
++#define TCG_TARGET_HAS_not_i32          1
++#define TCG_TARGET_HAS_rot_i32          1
++#define TCG_TARGET_HAS_andc_i32         1
++#define TCG_TARGET_HAS_orc_i32          1
++#define TCG_TARGET_HAS_eqv_i32          1
++#define TCG_TARGET_HAS_nand_i32         0
++#define TCG_TARGET_HAS_nor_i32          0
++#define TCG_TARGET_HAS_clz_i32          1
++#define TCG_TARGET_HAS_ctz_i32          1
++#define TCG_TARGET_HAS_ctpop_i32        0
++#define TCG_TARGET_HAS_deposit_i32      1
++#define TCG_TARGET_HAS_extract_i32      1
++#define TCG_TARGET_HAS_sextract_i32     1
++#define TCG_TARGET_HAS_extract2_i32     1
++#define TCG_TARGET_HAS_negsetcond_i32   1
++#define TCG_TARGET_HAS_add2_i32         1
++#define TCG_TARGET_HAS_sub2_i32         1
++#define TCG_TARGET_HAS_mulu2_i32        0
++#define TCG_TARGET_HAS_muls2_i32        0
++#define TCG_TARGET_HAS_muluh_i32        0
++#define TCG_TARGET_HAS_mulsh_i32        0
++#define TCG_TARGET_HAS_extr_i64_i32     0
++#define TCG_TARGET_HAS_qemu_st8_i32     0
++
++#define TCG_TARGET_HAS_div_i64          1
++#define TCG_TARGET_HAS_rem_i64          1
++#define TCG_TARGET_HAS_ext8s_i64        1
++#define TCG_TARGET_HAS_ext16s_i64       1
++#define TCG_TARGET_HAS_ext32s_i64       1
++#define TCG_TARGET_HAS_ext8u_i64        1
++#define TCG_TARGET_HAS_ext16u_i64       1
++#define TCG_TARGET_HAS_ext32u_i64       1
++#define TCG_TARGET_HAS_bswap16_i64      1
++#define TCG_TARGET_HAS_bswap32_i64      1
++#define TCG_TARGET_HAS_bswap64_i64      1
++#define TCG_TARGET_HAS_not_i64          1
++#define TCG_TARGET_HAS_rot_i64          1
++#define TCG_TARGET_HAS_andc_i64         1
++#define TCG_TARGET_HAS_orc_i64          1
++#define TCG_TARGET_HAS_eqv_i64          1
++#define TCG_TARGET_HAS_nand_i64         0
++#define TCG_TARGET_HAS_nor_i64          0
++#define TCG_TARGET_HAS_clz_i64          1
++#define TCG_TARGET_HAS_ctz_i64          1
++#define TCG_TARGET_HAS_ctpop_i64        0
++#define TCG_TARGET_HAS_deposit_i64      1
++#define TCG_TARGET_HAS_extract_i64      1
++#define TCG_TARGET_HAS_sextract_i64     1
++#define TCG_TARGET_HAS_extract2_i64     1
++#define TCG_TARGET_HAS_negsetcond_i64   1
++#define TCG_TARGET_HAS_add2_i64         1
++#define TCG_TARGET_HAS_sub2_i64         1
++#define TCG_TARGET_HAS_mulu2_i64        0
++#define TCG_TARGET_HAS_muls2_i64        0
++#define TCG_TARGET_HAS_muluh_i64        1
++#define TCG_TARGET_HAS_mulsh_i64        1
++
++/*
++ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
++ * which requires writable pages.  We must defer to the helper for user-only,
++ * but in system mode all ram is writable for the host.
++ */
++#ifdef CONFIG_USER_ONLY
++#define TCG_TARGET_HAS_qemu_ldst_i128   have_lse2
++#else
++#define TCG_TARGET_HAS_qemu_ldst_i128   1
++#endif
++
++#define TCG_TARGET_HAS_tst              1
++
++#define TCG_TARGET_HAS_v64              1
++#define TCG_TARGET_HAS_v128             1
++#define TCG_TARGET_HAS_v256             0
++
++#define TCG_TARGET_HAS_andc_vec         1
++#define TCG_TARGET_HAS_orc_vec          1
++#define TCG_TARGET_HAS_nand_vec         0
++#define TCG_TARGET_HAS_nor_vec          0
++#define TCG_TARGET_HAS_eqv_vec          0
++#define TCG_TARGET_HAS_not_vec          1
++#define TCG_TARGET_HAS_neg_vec          1
++#define TCG_TARGET_HAS_abs_vec          1
++#define TCG_TARGET_HAS_roti_vec         0
++#define TCG_TARGET_HAS_rots_vec         0
++#define TCG_TARGET_HAS_rotv_vec         0
++#define TCG_TARGET_HAS_shi_vec          1
++#define TCG_TARGET_HAS_shs_vec          0
++#define TCG_TARGET_HAS_shv_vec          1
++#define TCG_TARGET_HAS_mul_vec          1
++#define TCG_TARGET_HAS_sat_vec          1
++#define TCG_TARGET_HAS_minmax_vec       1
++#define TCG_TARGET_HAS_bitsel_vec       1
++#define TCG_TARGET_HAS_cmpsel_vec       0
++#define TCG_TARGET_HAS_tst_vec          1
++
++#endif
+diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/aarch64/tcg-target.h
++++ b/tcg/aarch64/tcg-target.h
+@@ -XXX,XX +XXX,XX @@
+ #ifndef AARCH64_TCG_TARGET_H
+ #define AARCH64_TCG_TARGET_H
+-#include "host/cpuinfo.h"
+-
+ #define TCG_TARGET_INSN_UNIT_SIZE  4
+ #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+@@ -XXX,XX +XXX,XX @@ typedef enum {
+ #define TCG_TARGET_NB_REGS 64
+-#define have_lse    (cpuinfo & CPUINFO_LSE)
+-#define have_lse2   (cpuinfo & CPUINFO_LSE2)
+-
+-/* optional instructions */
+-#define TCG_TARGET_HAS_div_i32          1
+-#define TCG_TARGET_HAS_rem_i32          1
+-#define TCG_TARGET_HAS_ext8s_i32        1
+-#define TCG_TARGET_HAS_ext16s_i32       1
+-#define TCG_TARGET_HAS_ext8u_i32        1
+-#define TCG_TARGET_HAS_ext16u_i32       1
+-#define TCG_TARGET_HAS_bswap16_i32      1
+-#define TCG_TARGET_HAS_bswap32_i32      1
+-#define TCG_TARGET_HAS_not_i32          1
+-#define TCG_TARGET_HAS_rot_i32          1
+-#define TCG_TARGET_HAS_andc_i32         1
+-#define TCG_TARGET_HAS_orc_i32          1
+-#define TCG_TARGET_HAS_eqv_i32          1
+-#define TCG_TARGET_HAS_nand_i32         0
+-#define TCG_TARGET_HAS_nor_i32          0
+-#define TCG_TARGET_HAS_clz_i32          1
+-#define TCG_TARGET_HAS_ctz_i32          1
+-#define TCG_TARGET_HAS_ctpop_i32        0
+-#define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+-#define TCG_TARGET_HAS_extract2_i32     1
+-#define TCG_TARGET_HAS_negsetcond_i32   1
+-#define TCG_TARGET_HAS_add2_i32         1
+-#define TCG_TARGET_HAS_sub2_i32         1
+-#define TCG_TARGET_HAS_mulu2_i32        0
+-#define TCG_TARGET_HAS_muls2_i32        0
+-#define TCG_TARGET_HAS_muluh_i32        0
+-#define TCG_TARGET_HAS_mulsh_i32        0
+-#define TCG_TARGET_HAS_extr_i64_i32     0
+-#define TCG_TARGET_HAS_qemu_st8_i32     0
+-
+-#define TCG_TARGET_HAS_div_i64          1
+-#define TCG_TARGET_HAS_rem_i64          1
+-#define TCG_TARGET_HAS_ext8s_i64        1
+-#define TCG_TARGET_HAS_ext16s_i64       1
+-#define TCG_TARGET_HAS_ext32s_i64       1
+-#define TCG_TARGET_HAS_ext8u_i64        1
+-#define TCG_TARGET_HAS_ext16u_i64       1
+-#define TCG_TARGET_HAS_ext32u_i64       1
+-#define TCG_TARGET_HAS_bswap16_i64      1
+-#define TCG_TARGET_HAS_bswap32_i64      1
+-#define TCG_TARGET_HAS_bswap64_i64      1
+-#define TCG_TARGET_HAS_not_i64          1
+-#define TCG_TARGET_HAS_rot_i64          1
+-#define TCG_TARGET_HAS_andc_i64         1
+-#define TCG_TARGET_HAS_orc_i64          1
+-#define TCG_TARGET_HAS_eqv_i64          1
+-#define TCG_TARGET_HAS_nand_i64         0
+-#define TCG_TARGET_HAS_nor_i64          0
+-#define TCG_TARGET_HAS_clz_i64          1
+-#define TCG_TARGET_HAS_ctz_i64          1
+-#define TCG_TARGET_HAS_ctpop_i64        0
+-#define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+-#define TCG_TARGET_HAS_extract2_i64     1
+-#define TCG_TARGET_HAS_negsetcond_i64   1
+-#define TCG_TARGET_HAS_add2_i64         1
+-#define TCG_TARGET_HAS_sub2_i64         1
+-#define TCG_TARGET_HAS_mulu2_i64        0
+-#define TCG_TARGET_HAS_muls2_i64        0
+-#define TCG_TARGET_HAS_muluh_i64        1
+-#define TCG_TARGET_HAS_mulsh_i64        1
+-
+-/*
+- * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
+- * which requires writable pages.  We must defer to the helper for user-only,
+- * but in system mode all ram is writable for the host.
+- */
+-#ifdef CONFIG_USER_ONLY
+-#define TCG_TARGET_HAS_qemu_ldst_i128   have_lse2
+-#else
+-#define TCG_TARGET_HAS_qemu_ldst_i128   1
+-#endif
+-
+-#define TCG_TARGET_HAS_tst              1
+-
+-#define TCG_TARGET_HAS_v64              1
+-#define TCG_TARGET_HAS_v128             1
+-#define TCG_TARGET_HAS_v256             0
+-
+-#define TCG_TARGET_HAS_andc_vec         1
+-#define TCG_TARGET_HAS_orc_vec          1
+-#define TCG_TARGET_HAS_nand_vec         0
+-#define TCG_TARGET_HAS_nor_vec          0
+-#define TCG_TARGET_HAS_eqv_vec          0
+-#define TCG_TARGET_HAS_not_vec          1
+-#define TCG_TARGET_HAS_neg_vec          1
+-#define TCG_TARGET_HAS_abs_vec          1
+-#define TCG_TARGET_HAS_roti_vec         0
+-#define TCG_TARGET_HAS_rots_vec         0
+-#define TCG_TARGET_HAS_rotv_vec         0
+-#define TCG_TARGET_HAS_shi_vec          1
+-#define TCG_TARGET_HAS_shs_vec          0
+-#define TCG_TARGET_HAS_shv_vec          1
+-#define TCG_TARGET_HAS_mul_vec          1
+-#define TCG_TARGET_HAS_sat_vec          1
+-#define TCG_TARGET_HAS_minmax_vec       1
+-#define TCG_TARGET_HAS_bitsel_vec       1
+-#define TCG_TARGET_HAS_cmpsel_vec       0
+-#define TCG_TARGET_HAS_tst_vec          1
++#include "tcg-target-has.h"
+ #define TCG_TARGET_DEFAULT_MO (0)
+--
+.43.0

-[PULL 20/56] tcg/optimize: Split out fold_const{1,2}
+[PULL 22/68] tcg/arm: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-Split out a whole bunch of placeholder functions, which are
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-currently identical.  That won't last as more code gets moved.
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Message-ID: <20250108215156.8731-5-philmd@linaro.org>
 ---
  tcg/arm/tcg-target-has.h | 85 ++++++++++++++++++++++++++++++++++++++++
  tcg/arm/tcg-target.h     | 74 +---------------------------------
 files changed, 86 insertions(+), 73 deletions(-)
  create mode 100644 tcg/arm/tcg-target-has.h
-Use CASE_32_64_VEC for some logical operators that previously
+diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
-missed the addition of vectors.
+new file mode 100644
+index XXXXXXX..XXXXXXX
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+--- /dev/null
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
++++ b/tcg/arm/tcg-target-has.h
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+@@ -XXX,XX +XXX,XX @@
----
++/* SPDX-License-Identifier: MIT */
  tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
 file changed, 219 insertions(+), 52 deletions(-)
 diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/optimize.c
 +++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
      }
  }
 +/*
-+ * The fold_* functions return true when processing is complete,
++ * Define target-specific opcode support
-+ * usually by folding the operation to a constant or to a copy,
++ * Copyright (c) 2008 Fabrice Bellard
-+ * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
++ * Copyright (c) 2008 Andrzej Zaborowski
 + * like collect information about the value produced, for use in
 + * optimizing a subsequent operation.
 + *
 + * These first fold_* functions are all helpers, used by other
 + * folders for more specific operations.
 + */
 +
-+static bool fold_const1(OptContext *ctx, TCGOp *op)
++#ifndef TCG_TARGET_HAS_H
-+{
++#define TCG_TARGET_HAS_H
 +    if (arg_is_const(op->args[1])) {
 +        uint64_t t;
 +
-+        t = arg_info(op->args[1])->val;
++extern int arm_arch;
 +        t = do_constant_folding(op->opc, t, 0);
 +        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
 +    }
 +    return false;
 +}
 +
-+static bool fold_const2(OptContext *ctx, TCGOp *op)
++#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
 +{
 +    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 +        uint64_t t1 = arg_info(op->args[1])->val;
 +        uint64_t t2 = arg_info(op->args[2])->val;
 +
-+        t1 = do_constant_folding(op->opc, t1, t2);
++#ifdef __ARM_ARCH_EXT_IDIV__
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
++#define use_idiv_instructions  1
-+    }
++#else
-+    return false;
++extern bool use_idiv_instructions;
-+}
++#endif
 +#ifdef __ARM_NEON__
 +#define use_neon_instructions  1
 +#else
 +extern bool use_neon_instructions;
 +#endif
 +
-+/*
++/* optional instructions */
-+ * These outermost fold_<op> functions are sorted alphabetically.
++#define TCG_TARGET_HAS_ext8s_i32        1
-+ */
++#define TCG_TARGET_HAS_ext16s_i32       1
 +#define TCG_TARGET_HAS_ext8u_i32        0 /* and r0, r1, #0xff */
 +#define TCG_TARGET_HAS_ext16u_i32       1
 +#define TCG_TARGET_HAS_bswap16_i32      1
 +#define TCG_TARGET_HAS_bswap32_i32      1
 +#define TCG_TARGET_HAS_not_i32          1
 +#define TCG_TARGET_HAS_rot_i32          1
 +#define TCG_TARGET_HAS_andc_i32         1
 +#define TCG_TARGET_HAS_orc_i32          0
 +#define TCG_TARGET_HAS_eqv_i32          0
 +#define TCG_TARGET_HAS_nand_i32         0
 +#define TCG_TARGET_HAS_nor_i32          0
 +#define TCG_TARGET_HAS_clz_i32          1
 +#define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
 +#define TCG_TARGET_HAS_ctpop_i32        0
 +#define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
 +#define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
 +#define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
 +#define TCG_TARGET_HAS_extract2_i32     1
 +#define TCG_TARGET_HAS_negsetcond_i32   1
 +#define TCG_TARGET_HAS_mulu2_i32        1
 +#define TCG_TARGET_HAS_muls2_i32        1
 +#define TCG_TARGET_HAS_muluh_i32        0
 +#define TCG_TARGET_HAS_mulsh_i32        0
 +#define TCG_TARGET_HAS_div_i32          use_idiv_instructions
 +#define TCG_TARGET_HAS_rem_i32          0
 +#define TCG_TARGET_HAS_qemu_st8_i32     0
 +
-+static bool fold_add(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_qemu_ldst_i128   0
 +{
 +    return fold_const2(ctx, op);
 +}
 +
-+static bool fold_and(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_tst              1
 +{
 +    return fold_const2(ctx, op);
 +}
 +
-+static bool fold_andc(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_v64              use_neon_instructions
-+{
++#define TCG_TARGET_HAS_v128             use_neon_instructions
-+    return fold_const2(ctx, op);
++#define TCG_TARGET_HAS_v256             0
 +}
 +
- static bool fold_call(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_andc_vec         1
- {
++#define TCG_TARGET_HAS_orc_vec          1
-     TCGContext *s = ctx->tcg;
++#define TCG_TARGET_HAS_nand_vec         0
-@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_nor_vec          0
-     return true;
++#define TCG_TARGET_HAS_eqv_vec          0
- }
++#define TCG_TARGET_HAS_not_vec          1
++#define TCG_TARGET_HAS_neg_vec          1
-+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_abs_vec          1
-+{
++#define TCG_TARGET_HAS_roti_vec         0
-+    return fold_const1(ctx, op);
++#define TCG_TARGET_HAS_rots_vec         0
-+}
++#define TCG_TARGET_HAS_rotv_vec         0
 +#define TCG_TARGET_HAS_shi_vec          1
 +#define TCG_TARGET_HAS_shs_vec          0
 +#define TCG_TARGET_HAS_shv_vec          0
 +#define TCG_TARGET_HAS_mul_vec          1
 +#define TCG_TARGET_HAS_sat_vec          1
 +#define TCG_TARGET_HAS_minmax_vec       1
 +#define TCG_TARGET_HAS_bitsel_vec       1
 +#define TCG_TARGET_HAS_cmpsel_vec       0
 +#define TCG_TARGET_HAS_tst_vec          1
 +
-+static bool fold_divide(OptContext *ctx, TCGOp *op)
++#endif
-+{
+diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
-+    return fold_const2(ctx, op);
+index XXXXXXX..XXXXXXX 100644
-+}
+--- a/tcg/arm/tcg-target.h
-+
++++ b/tcg/arm/tcg-target.h
-+static bool fold_eqv(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@
-+{
+ #ifndef ARM_TCG_TARGET_H
-+    return fold_const2(ctx, op);
+ #define ARM_TCG_TARGET_H
-+}
-+
+-extern int arm_arch;
 +static bool fold_exts(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const1(ctx, op);
 +}
 +
 +static bool fold_extu(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const1(ctx, op);
 +}
 +
  static bool fold_mb(OptContext *ctx, TCGOp *op)
  {
      /* Eliminate duplicate and redundant fence instructions.  */
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
      return true;
  }
 +static bool fold_mul(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
 +static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
 +static bool fold_nand(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
 +static bool fold_neg(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const1(ctx, op);
 +}
 +
 +static bool fold_nor(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
 +static bool fold_not(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const1(ctx, op);
 +}
 +
 +static bool fold_or(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
 +static bool fold_orc(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
  static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
  {
      /* Opcodes that touch guest memory stop the mb optimization.  */
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
      return false;
  }
 +static bool fold_remainder(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
 +static bool fold_shift(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
 +static bool fold_sub(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
 +static bool fold_xor(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op);
 +}
 +
  /* Propagate constants and copies, fold constant expressions. */
  void tcg_optimize(TCGContext *s)
  {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              }
              break;
 -        CASE_OP_32_64(not):
 -        CASE_OP_32_64(neg):
 -        CASE_OP_32_64(ext8s):
 -        CASE_OP_32_64(ext8u):
 -        CASE_OP_32_64(ext16s):
 -        CASE_OP_32_64(ext16u):
 -        CASE_OP_32_64(ctpop):
 -        case INDEX_op_ext32s_i64:
 -        case INDEX_op_ext32u_i64:
 -        case INDEX_op_ext_i32_i64:
 -        case INDEX_op_extu_i32_i64:
 -        case INDEX_op_extrl_i64_i32:
 -        case INDEX_op_extrh_i64_i32:
 -            if (arg_is_const(op->args[1])) {
 -                tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
 -                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
 -                continue;
 -            }
 -            break;
 -
-         CASE_OP_32_64(bswap16):
+-#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
          CASE_OP_32_64(bswap32):
          case INDEX_op_bswap64_i64:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              }
              break;
 -        CASE_OP_32_64(add):
 -        CASE_OP_32_64(sub):
 -        CASE_OP_32_64(mul):
 -        CASE_OP_32_64(or):
 -        CASE_OP_32_64(and):
 -        CASE_OP_32_64(xor):
 -        CASE_OP_32_64(shl):
 -        CASE_OP_32_64(shr):
 -        CASE_OP_32_64(sar):
 -        CASE_OP_32_64(rotl):
 -        CASE_OP_32_64(rotr):
 -        CASE_OP_32_64(andc):
 -        CASE_OP_32_64(orc):
 -        CASE_OP_32_64(eqv):
 -        CASE_OP_32_64(nand):
 -        CASE_OP_32_64(nor):
 -        CASE_OP_32_64(muluh):
 -        CASE_OP_32_64(mulsh):
 -        CASE_OP_32_64(div):
 -        CASE_OP_32_64(divu):
 -        CASE_OP_32_64(rem):
 -        CASE_OP_32_64(remu):
 -            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 -                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
 -                                          arg_info(op->args[2])->val);
 -                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
 -                continue;
 -            }
 -            break;
 -
-         CASE_OP_32_64(clz):
+ #define TCG_TARGET_INSN_UNIT_SIZE 4
-         CASE_OP_32_64(ctz):
+ #define MAX_CODE_GEN_BUFFER_SIZE  UINT32_MAX
-             if (arg_is_const(op->args[1])) {
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@ typedef enum {
-             }
-             break;
+ #define TCG_TARGET_NB_REGS 32
-+        default:
+-#ifdef __ARM_ARCH_EXT_IDIV__
-+            break;
+-#define use_idiv_instructions  1
-+
+-#else
-+        /* ---------------------------------------------------------- */
+-extern bool use_idiv_instructions;
-+        /* Sorted alphabetically by opcode as much as possible. */
+-#endif
-+
+-#ifdef __ARM_NEON__
-+        CASE_OP_32_64_VEC(add):
+-#define use_neon_instructions  1
-+            done = fold_add(&ctx, op);
+-#else
-+            break;
+-extern bool use_neon_instructions;
-+        CASE_OP_32_64_VEC(and):
+-#endif
 +            done = fold_and(&ctx, op);
 +            break;
 +        CASE_OP_32_64_VEC(andc):
 +            done = fold_andc(&ctx, op);
 +            break;
 +        CASE_OP_32_64(ctpop):
 +            done = fold_ctpop(&ctx, op);
 +            break;
 +        CASE_OP_32_64(div):
 +        CASE_OP_32_64(divu):
 +            done = fold_divide(&ctx, op);
 +            break;
 +        CASE_OP_32_64(eqv):
 +            done = fold_eqv(&ctx, op);
 +            break;
 +        CASE_OP_32_64(ext8s):
 +        CASE_OP_32_64(ext16s):
 +        case INDEX_op_ext32s_i64:
 +        case INDEX_op_ext_i32_i64:
 +            done = fold_exts(&ctx, op);
 +            break;
 +        CASE_OP_32_64(ext8u):
 +        CASE_OP_32_64(ext16u):
 +        case INDEX_op_ext32u_i64:
 +        case INDEX_op_extu_i32_i64:
 +        case INDEX_op_extrl_i64_i32:
 +        case INDEX_op_extrh_i64_i32:
 +            done = fold_extu(&ctx, op);
 +            break;
          case INDEX_op_mb:
              done = fold_mb(&ctx, op);
              break;
 +        CASE_OP_32_64(mul):
 +            done = fold_mul(&ctx, op);
 +            break;
 +        CASE_OP_32_64(mulsh):
 +        CASE_OP_32_64(muluh):
 +            done = fold_mul_highpart(&ctx, op);
 +            break;
 +        CASE_OP_32_64(nand):
 +            done = fold_nand(&ctx, op);
 +            break;
 +        CASE_OP_32_64(neg):
 +            done = fold_neg(&ctx, op);
 +            break;
 +        CASE_OP_32_64(nor):
 +            done = fold_nor(&ctx, op);
 +            break;
 +        CASE_OP_32_64_VEC(not):
 +            done = fold_not(&ctx, op);
 +            break;
 +        CASE_OP_32_64_VEC(or):
 +            done = fold_or(&ctx, op);
 +            break;
 +        CASE_OP_32_64_VEC(orc):
 +            done = fold_orc(&ctx, op);
 +            break;
          case INDEX_op_qemu_ld_i32:
          case INDEX_op_qemu_ld_i64:
              done = fold_qemu_ld(&ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          case INDEX_op_qemu_st_i64:
              done = fold_qemu_st(&ctx, op);
              break;
 -
--        default:
+-/* optional instructions */
-+        CASE_OP_32_64(rem):
+-#define TCG_TARGET_HAS_ext8s_i32        1
-+        CASE_OP_32_64(remu):
+-#define TCG_TARGET_HAS_ext16s_i32       1
-+            done = fold_remainder(&ctx, op);
+-#define TCG_TARGET_HAS_ext8u_i32        0 /* and r0, r1, #0xff */
-+            break;
+-#define TCG_TARGET_HAS_ext16u_i32       1
-+        CASE_OP_32_64(rotl):
+-#define TCG_TARGET_HAS_bswap16_i32      1
-+        CASE_OP_32_64(rotr):
+-#define TCG_TARGET_HAS_bswap32_i32      1
-+        CASE_OP_32_64(sar):
+-#define TCG_TARGET_HAS_not_i32          1
-+        CASE_OP_32_64(shl):
+-#define TCG_TARGET_HAS_rot_i32          1
-+        CASE_OP_32_64(shr):
+-#define TCG_TARGET_HAS_andc_i32         1
-+            done = fold_shift(&ctx, op);
+-#define TCG_TARGET_HAS_orc_i32          0
-+            break;
+-#define TCG_TARGET_HAS_eqv_i32          0
-+        CASE_OP_32_64_VEC(sub):
+-#define TCG_TARGET_HAS_nand_i32         0
-+            done = fold_sub(&ctx, op);
+-#define TCG_TARGET_HAS_nor_i32          0
-+            break;
+-#define TCG_TARGET_HAS_clz_i32          1
-+        CASE_OP_32_64_VEC(xor):
+-#define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
-+            done = fold_xor(&ctx, op);
+-#define TCG_TARGET_HAS_ctpop_i32        0
-             break;
+-#define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
-         }
+-#define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
 -#define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
 -#define TCG_TARGET_HAS_extract2_i32     1
 -#define TCG_TARGET_HAS_negsetcond_i32   1
 -#define TCG_TARGET_HAS_mulu2_i32        1
 -#define TCG_TARGET_HAS_muls2_i32        1
 -#define TCG_TARGET_HAS_muluh_i32        0
 -#define TCG_TARGET_HAS_mulsh_i32        0
 -#define TCG_TARGET_HAS_div_i32          use_idiv_instructions
 -#define TCG_TARGET_HAS_rem_i32          0
 -#define TCG_TARGET_HAS_qemu_st8_i32     0
 -
 -#define TCG_TARGET_HAS_qemu_ldst_i128   0
 -
 -#define TCG_TARGET_HAS_tst              1
 -
 -#define TCG_TARGET_HAS_v64              use_neon_instructions
 -#define TCG_TARGET_HAS_v128             use_neon_instructions
 -#define TCG_TARGET_HAS_v256             0
 -
 -#define TCG_TARGET_HAS_andc_vec         1
 -#define TCG_TARGET_HAS_orc_vec          1
 -#define TCG_TARGET_HAS_nand_vec         0
 -#define TCG_TARGET_HAS_nor_vec          0
 -#define TCG_TARGET_HAS_eqv_vec          0
 -#define TCG_TARGET_HAS_not_vec          1
 -#define TCG_TARGET_HAS_neg_vec          1
 -#define TCG_TARGET_HAS_abs_vec          1
 -#define TCG_TARGET_HAS_roti_vec         0
 -#define TCG_TARGET_HAS_rots_vec         0
 -#define TCG_TARGET_HAS_rotv_vec         0
 -#define TCG_TARGET_HAS_shi_vec          1
 -#define TCG_TARGET_HAS_shs_vec          0
 -#define TCG_TARGET_HAS_shv_vec          0
 -#define TCG_TARGET_HAS_mul_vec          1
 -#define TCG_TARGET_HAS_sat_vec          1
 -#define TCG_TARGET_HAS_minmax_vec       1
 -#define TCG_TARGET_HAS_bitsel_vec       1
 -#define TCG_TARGET_HAS_cmpsel_vec       0
 -#define TCG_TARGET_HAS_tst_vec          1
 +#include "tcg-target-has.h"
  #define TCG_TARGET_DEFAULT_MO (0)
 --
-.25.1
+.43.0

-New patch
+[PULL 23/68] tcg/i386: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-6-philmd@linaro.org>
+---
+ tcg/i386/tcg-target-has.h | 139 ++++++++++++++++++++++++++++++++++++++
+ tcg/i386/tcg-target.h     | 129 +----------------------------------
+files changed, 140 insertions(+), 128 deletions(-)
+ create mode 100644 tcg/i386/tcg-target-has.h
+diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/tcg/i386/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Define target-specific opcode support
++ * Copyright (c) 2008 Fabrice Bellard
++ */
++
++#ifndef TCG_TARGET_HAS_H
++#define TCG_TARGET_HAS_H
++
++#include "host/cpuinfo.h"
++
++#define have_bmi1         (cpuinfo & CPUINFO_BMI1)
++#define have_popcnt       (cpuinfo & CPUINFO_POPCNT)
++#define have_avx1         (cpuinfo & CPUINFO_AVX1)
++#define have_avx2         (cpuinfo & CPUINFO_AVX2)
++#define have_movbe        (cpuinfo & CPUINFO_MOVBE)
++
++/*
++ * There are interesting instructions in AVX512, so long as we have AVX512VL,
++ * which indicates support for EVEX on sizes smaller than 512 bits.
++ */
++#define have_avx512vl     ((cpuinfo & CPUINFO_AVX512VL) && \
++                           (cpuinfo & CPUINFO_AVX512F))
++#define have_avx512bw     ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
++#define have_avx512dq     ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
++#define have_avx512vbmi2  ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
++
++/* optional instructions */
++#define TCG_TARGET_HAS_div2_i32         1
++#define TCG_TARGET_HAS_rot_i32          1
++#define TCG_TARGET_HAS_ext8s_i32        1
++#define TCG_TARGET_HAS_ext16s_i32       1
++#define TCG_TARGET_HAS_ext8u_i32        1
++#define TCG_TARGET_HAS_ext16u_i32       1
++#define TCG_TARGET_HAS_bswap16_i32      1
++#define TCG_TARGET_HAS_bswap32_i32      1
++#define TCG_TARGET_HAS_not_i32          1
++#define TCG_TARGET_HAS_andc_i32         have_bmi1
++#define TCG_TARGET_HAS_orc_i32          0
++#define TCG_TARGET_HAS_eqv_i32          0
++#define TCG_TARGET_HAS_nand_i32         0
++#define TCG_TARGET_HAS_nor_i32          0
++#define TCG_TARGET_HAS_clz_i32          1
++#define TCG_TARGET_HAS_ctz_i32          1
++#define TCG_TARGET_HAS_ctpop_i32        have_popcnt
++#define TCG_TARGET_HAS_deposit_i32      1
++#define TCG_TARGET_HAS_extract_i32      1
++#define TCG_TARGET_HAS_sextract_i32     1
++#define TCG_TARGET_HAS_extract2_i32     1
++#define TCG_TARGET_HAS_negsetcond_i32   1
++#define TCG_TARGET_HAS_add2_i32         1
++#define TCG_TARGET_HAS_sub2_i32         1
++#define TCG_TARGET_HAS_mulu2_i32        1
++#define TCG_TARGET_HAS_muls2_i32        1
++#define TCG_TARGET_HAS_muluh_i32        0
++#define TCG_TARGET_HAS_mulsh_i32        0
++
++#if TCG_TARGET_REG_BITS == 64
++/* Keep 32-bit values zero-extended in a register.  */
++#define TCG_TARGET_HAS_extr_i64_i32     1
++#define TCG_TARGET_HAS_div2_i64         1
++#define TCG_TARGET_HAS_rot_i64          1
++#define TCG_TARGET_HAS_ext8s_i64        1
++#define TCG_TARGET_HAS_ext16s_i64       1
++#define TCG_TARGET_HAS_ext32s_i64       1
++#define TCG_TARGET_HAS_ext8u_i64        1
++#define TCG_TARGET_HAS_ext16u_i64       1
++#define TCG_TARGET_HAS_ext32u_i64       1
++#define TCG_TARGET_HAS_bswap16_i64      1
++#define TCG_TARGET_HAS_bswap32_i64      1
++#define TCG_TARGET_HAS_bswap64_i64      1
++#define TCG_TARGET_HAS_not_i64          1
++#define TCG_TARGET_HAS_andc_i64         have_bmi1
++#define TCG_TARGET_HAS_orc_i64          0
++#define TCG_TARGET_HAS_eqv_i64          0
++#define TCG_TARGET_HAS_nand_i64         0
++#define TCG_TARGET_HAS_nor_i64          0
++#define TCG_TARGET_HAS_clz_i64          1
++#define TCG_TARGET_HAS_ctz_i64          1
++#define TCG_TARGET_HAS_ctpop_i64        have_popcnt
++#define TCG_TARGET_HAS_deposit_i64      1
++#define TCG_TARGET_HAS_extract_i64      1
++#define TCG_TARGET_HAS_sextract_i64     0
++#define TCG_TARGET_HAS_extract2_i64     1
++#define TCG_TARGET_HAS_negsetcond_i64   1
++#define TCG_TARGET_HAS_add2_i64         1
++#define TCG_TARGET_HAS_sub2_i64         1
++#define TCG_TARGET_HAS_mulu2_i64        1
++#define TCG_TARGET_HAS_muls2_i64        1
++#define TCG_TARGET_HAS_muluh_i64        0
++#define TCG_TARGET_HAS_mulsh_i64        0
++#define TCG_TARGET_HAS_qemu_st8_i32     0
++#else
++#define TCG_TARGET_HAS_qemu_st8_i32     1
++#endif
++
++#define TCG_TARGET_HAS_qemu_ldst_i128 \
++    (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
++
++#define TCG_TARGET_HAS_tst              1
++
++/* We do not support older SSE systems, only beginning with AVX1.  */
++#define TCG_TARGET_HAS_v64              have_avx1
++#define TCG_TARGET_HAS_v128             have_avx1
++#define TCG_TARGET_HAS_v256             have_avx2
++
++#define TCG_TARGET_HAS_andc_vec         1
++#define TCG_TARGET_HAS_orc_vec          have_avx512vl
++#define TCG_TARGET_HAS_nand_vec         have_avx512vl
++#define TCG_TARGET_HAS_nor_vec          have_avx512vl
++#define TCG_TARGET_HAS_eqv_vec          have_avx512vl
++#define TCG_TARGET_HAS_not_vec          have_avx512vl
++#define TCG_TARGET_HAS_neg_vec          0
++#define TCG_TARGET_HAS_abs_vec          1
++#define TCG_TARGET_HAS_roti_vec         have_avx512vl
++#define TCG_TARGET_HAS_rots_vec         0
++#define TCG_TARGET_HAS_rotv_vec         have_avx512vl
++#define TCG_TARGET_HAS_shi_vec          1
++#define TCG_TARGET_HAS_shs_vec          1
++#define TCG_TARGET_HAS_shv_vec          have_avx2
++#define TCG_TARGET_HAS_mul_vec          1
++#define TCG_TARGET_HAS_sat_vec          1
++#define TCG_TARGET_HAS_minmax_vec       1
++#define TCG_TARGET_HAS_bitsel_vec       have_avx512vl
++#define TCG_TARGET_HAS_cmpsel_vec       1
++#define TCG_TARGET_HAS_tst_vec          have_avx512bw
++
++#define TCG_TARGET_deposit_i32_valid(ofs, len) \
++    (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
++     (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
++#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
++
++/* Check for the possibility of high-byte extraction and, for 64-bit,
++   zero-extending 32-bit right-shift.  */
++#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
++#define TCG_TARGET_extract_i64_valid(ofs, len) \
++    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
++
++#endif
+diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/i386/tcg-target.h
++++ b/tcg/i386/tcg-target.h
+@@ -XXX,XX +XXX,XX @@
+ #ifndef I386_TCG_TARGET_H
+ #define I386_TCG_TARGET_H
+-#include "host/cpuinfo.h"
+-
+ #define TCG_TARGET_INSN_UNIT_SIZE  1
+ #ifdef __x86_64__
+@@ -XXX,XX +XXX,XX @@ typedef enum {
+     TCG_REG_CALL_STACK = TCG_REG_ESP
+ } TCGReg;
+-#define have_bmi1         (cpuinfo & CPUINFO_BMI1)
+-#define have_popcnt       (cpuinfo & CPUINFO_POPCNT)
+-#define have_avx1         (cpuinfo & CPUINFO_AVX1)
+-#define have_avx2         (cpuinfo & CPUINFO_AVX2)
+-#define have_movbe        (cpuinfo & CPUINFO_MOVBE)
+-
+-/*
+- * There are interesting instructions in AVX512, so long as we have AVX512VL,
+- * which indicates support for EVEX on sizes smaller than 512 bits.
+- */
+-#define have_avx512vl     ((cpuinfo & CPUINFO_AVX512VL) && \
+-                           (cpuinfo & CPUINFO_AVX512F))
+-#define have_avx512bw     ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
+-#define have_avx512dq     ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
+-#define have_avx512vbmi2  ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
+-
+-/* optional instructions */
+-#define TCG_TARGET_HAS_div2_i32         1
+-#define TCG_TARGET_HAS_rot_i32          1
+-#define TCG_TARGET_HAS_ext8s_i32        1
+-#define TCG_TARGET_HAS_ext16s_i32       1
+-#define TCG_TARGET_HAS_ext8u_i32        1
+-#define TCG_TARGET_HAS_ext16u_i32       1
+-#define TCG_TARGET_HAS_bswap16_i32      1
+-#define TCG_TARGET_HAS_bswap32_i32      1
+-#define TCG_TARGET_HAS_not_i32          1
+-#define TCG_TARGET_HAS_andc_i32         have_bmi1
+-#define TCG_TARGET_HAS_orc_i32          0
+-#define TCG_TARGET_HAS_eqv_i32          0
+-#define TCG_TARGET_HAS_nand_i32         0
+-#define TCG_TARGET_HAS_nor_i32          0
+-#define TCG_TARGET_HAS_clz_i32          1
+-#define TCG_TARGET_HAS_ctz_i32          1
+-#define TCG_TARGET_HAS_ctpop_i32        have_popcnt
+-#define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+-#define TCG_TARGET_HAS_extract2_i32     1
+-#define TCG_TARGET_HAS_negsetcond_i32   1
+-#define TCG_TARGET_HAS_add2_i32         1
+-#define TCG_TARGET_HAS_sub2_i32         1
+-#define TCG_TARGET_HAS_mulu2_i32        1
+-#define TCG_TARGET_HAS_muls2_i32        1
+-#define TCG_TARGET_HAS_muluh_i32        0
+-#define TCG_TARGET_HAS_mulsh_i32        0
+-
+-#if TCG_TARGET_REG_BITS == 64
+-/* Keep 32-bit values zero-extended in a register.  */
+-#define TCG_TARGET_HAS_extr_i64_i32     1
+-#define TCG_TARGET_HAS_div2_i64         1
+-#define TCG_TARGET_HAS_rot_i64          1
+-#define TCG_TARGET_HAS_ext8s_i64        1
+-#define TCG_TARGET_HAS_ext16s_i64       1
+-#define TCG_TARGET_HAS_ext32s_i64       1
+-#define TCG_TARGET_HAS_ext8u_i64        1
+-#define TCG_TARGET_HAS_ext16u_i64       1
+-#define TCG_TARGET_HAS_ext32u_i64       1
+-#define TCG_TARGET_HAS_bswap16_i64      1
+-#define TCG_TARGET_HAS_bswap32_i64      1
+-#define TCG_TARGET_HAS_bswap64_i64      1
+-#define TCG_TARGET_HAS_not_i64          1
+-#define TCG_TARGET_HAS_andc_i64         have_bmi1
+-#define TCG_TARGET_HAS_orc_i64          0
+-#define TCG_TARGET_HAS_eqv_i64          0
+-#define TCG_TARGET_HAS_nand_i64         0
+-#define TCG_TARGET_HAS_nor_i64          0
+-#define TCG_TARGET_HAS_clz_i64          1
+-#define TCG_TARGET_HAS_ctz_i64          1
+-#define TCG_TARGET_HAS_ctpop_i64        have_popcnt
+-#define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     0
+-#define TCG_TARGET_HAS_extract2_i64     1
+-#define TCG_TARGET_HAS_negsetcond_i64   1
+-#define TCG_TARGET_HAS_add2_i64         1
+-#define TCG_TARGET_HAS_sub2_i64         1
+-#define TCG_TARGET_HAS_mulu2_i64        1
+-#define TCG_TARGET_HAS_muls2_i64        1
+-#define TCG_TARGET_HAS_muluh_i64        0
+-#define TCG_TARGET_HAS_mulsh_i64        0
+-#define TCG_TARGET_HAS_qemu_st8_i32     0
+-#else
+-#define TCG_TARGET_HAS_qemu_st8_i32     1
+-#endif
+-
+-#define TCG_TARGET_HAS_qemu_ldst_i128 \
+-    (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
+-
+-#define TCG_TARGET_HAS_tst              1
+-
+-/* We do not support older SSE systems, only beginning with AVX1.  */
+-#define TCG_TARGET_HAS_v64              have_avx1
+-#define TCG_TARGET_HAS_v128             have_avx1
+-#define TCG_TARGET_HAS_v256             have_avx2
+-
+-#define TCG_TARGET_HAS_andc_vec         1
+-#define TCG_TARGET_HAS_orc_vec          have_avx512vl
+-#define TCG_TARGET_HAS_nand_vec         have_avx512vl
+-#define TCG_TARGET_HAS_nor_vec          have_avx512vl
+-#define TCG_TARGET_HAS_eqv_vec          have_avx512vl
+-#define TCG_TARGET_HAS_not_vec          have_avx512vl
+-#define TCG_TARGET_HAS_neg_vec          0
+-#define TCG_TARGET_HAS_abs_vec          1
+-#define TCG_TARGET_HAS_roti_vec         have_avx512vl
+-#define TCG_TARGET_HAS_rots_vec         0
+-#define TCG_TARGET_HAS_rotv_vec         have_avx512vl
+-#define TCG_TARGET_HAS_shi_vec          1
+-#define TCG_TARGET_HAS_shs_vec          1
+-#define TCG_TARGET_HAS_shv_vec          have_avx2
+-#define TCG_TARGET_HAS_mul_vec          1
+-#define TCG_TARGET_HAS_sat_vec          1
+-#define TCG_TARGET_HAS_minmax_vec       1
+-#define TCG_TARGET_HAS_bitsel_vec       have_avx512vl
+-#define TCG_TARGET_HAS_cmpsel_vec       1
+-#define TCG_TARGET_HAS_tst_vec          have_avx512bw
+-
+-#define TCG_TARGET_deposit_i32_valid(ofs, len) \
+-    (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
+-     (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
+-#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
+-
+-/* Check for the possibility of high-byte extraction and, for 64-bit,
+-   zero-extending 32-bit right-shift.  */
+-#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
+-#define TCG_TARGET_extract_i64_valid(ofs, len) \
+-    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
++#include "tcg-target-has.h"
+ /* This defines the natural memory order supported by this
+  * architecture before guarantees made by various barrier
+--
+.43.0

-New patch
+[PULL 24/68] tcg/loongarch64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-7-philmd@linaro.org>
+---
+ tcg/loongarch64/tcg-target-has.h | 113 +++++++++++++++++++++++++++++++
+ tcg/loongarch64/tcg-target.h     | 102 +---------------------------
+files changed, 114 insertions(+), 101 deletions(-)
+ create mode 100644 tcg/loongarch64/tcg-target-has.h
+diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/tcg/loongarch64/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Define target-specific opcode support
++ * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
++ */
++
++#ifndef TCG_TARGET_HAS_H
++#define TCG_TARGET_HAS_H
++
++#include "host/cpuinfo.h"
++
++/* optional instructions */
++#define TCG_TARGET_HAS_negsetcond_i32   0
++#define TCG_TARGET_HAS_div_i32          1
++#define TCG_TARGET_HAS_rem_i32          1
++#define TCG_TARGET_HAS_div2_i32         0
++#define TCG_TARGET_HAS_rot_i32          1
++#define TCG_TARGET_HAS_deposit_i32      1
++#define TCG_TARGET_HAS_extract_i32      1
++#define TCG_TARGET_HAS_sextract_i32     0
++#define TCG_TARGET_HAS_extract2_i32     0
++#define TCG_TARGET_HAS_add2_i32         0
++#define TCG_TARGET_HAS_sub2_i32         0
++#define TCG_TARGET_HAS_mulu2_i32        0
++#define TCG_TARGET_HAS_muls2_i32        0
++#define TCG_TARGET_HAS_muluh_i32        1
++#define TCG_TARGET_HAS_mulsh_i32        1
++#define TCG_TARGET_HAS_ext8s_i32        1
++#define TCG_TARGET_HAS_ext16s_i32       1
++#define TCG_TARGET_HAS_ext8u_i32        1
++#define TCG_TARGET_HAS_ext16u_i32       1
++#define TCG_TARGET_HAS_bswap16_i32      1
++#define TCG_TARGET_HAS_bswap32_i32      1
++#define TCG_TARGET_HAS_not_i32          1
++#define TCG_TARGET_HAS_andc_i32         1
++#define TCG_TARGET_HAS_orc_i32          1
++#define TCG_TARGET_HAS_eqv_i32          0
++#define TCG_TARGET_HAS_nand_i32         0
++#define TCG_TARGET_HAS_nor_i32          1
++#define TCG_TARGET_HAS_clz_i32          1
++#define TCG_TARGET_HAS_ctz_i32          1
++#define TCG_TARGET_HAS_ctpop_i32        0
++#define TCG_TARGET_HAS_brcond2          0
++#define TCG_TARGET_HAS_setcond2         0
++#define TCG_TARGET_HAS_qemu_st8_i32     0
++
++/* 64-bit operations */
++#define TCG_TARGET_HAS_negsetcond_i64   0
++#define TCG_TARGET_HAS_div_i64          1
++#define TCG_TARGET_HAS_rem_i64          1
++#define TCG_TARGET_HAS_div2_i64         0
++#define TCG_TARGET_HAS_rot_i64          1
++#define TCG_TARGET_HAS_deposit_i64      1
++#define TCG_TARGET_HAS_extract_i64      1
++#define TCG_TARGET_HAS_sextract_i64     0
++#define TCG_TARGET_HAS_extract2_i64     0
++#define TCG_TARGET_HAS_extr_i64_i32     1
++#define TCG_TARGET_HAS_ext8s_i64        1
++#define TCG_TARGET_HAS_ext16s_i64       1
++#define TCG_TARGET_HAS_ext32s_i64       1
++#define TCG_TARGET_HAS_ext8u_i64        1
++#define TCG_TARGET_HAS_ext16u_i64       1
++#define TCG_TARGET_HAS_ext32u_i64       1
++#define TCG_TARGET_HAS_bswap16_i64      1
++#define TCG_TARGET_HAS_bswap32_i64      1
++#define TCG_TARGET_HAS_bswap64_i64      1
++#define TCG_TARGET_HAS_not_i64          1
++#define TCG_TARGET_HAS_andc_i64         1
++#define TCG_TARGET_HAS_orc_i64          1
++#define TCG_TARGET_HAS_eqv_i64          0
++#define TCG_TARGET_HAS_nand_i64         0
++#define TCG_TARGET_HAS_nor_i64          1
++#define TCG_TARGET_HAS_clz_i64          1
++#define TCG_TARGET_HAS_ctz_i64          1
++#define TCG_TARGET_HAS_ctpop_i64        0
++#define TCG_TARGET_HAS_add2_i64         0
++#define TCG_TARGET_HAS_sub2_i64         0
++#define TCG_TARGET_HAS_mulu2_i64        0
++#define TCG_TARGET_HAS_muls2_i64        0
++#define TCG_TARGET_HAS_muluh_i64        1
++#define TCG_TARGET_HAS_mulsh_i64        1
++
++#define TCG_TARGET_HAS_qemu_ldst_i128   (cpuinfo & CPUINFO_LSX)
++
++#define TCG_TARGET_HAS_tst              0
++
++#define TCG_TARGET_HAS_v64              (cpuinfo & CPUINFO_LSX)
++#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_LSX)
++#define TCG_TARGET_HAS_v256             (cpuinfo & CPUINFO_LASX)
++
++#define TCG_TARGET_HAS_not_vec          1
++#define TCG_TARGET_HAS_neg_vec          1
++#define TCG_TARGET_HAS_abs_vec          0
++#define TCG_TARGET_HAS_andc_vec         1
++#define TCG_TARGET_HAS_orc_vec          1
++#define TCG_TARGET_HAS_nand_vec         0
++#define TCG_TARGET_HAS_nor_vec          1
++#define TCG_TARGET_HAS_eqv_vec          0
++#define TCG_TARGET_HAS_mul_vec          1
++#define TCG_TARGET_HAS_shi_vec          1
++#define TCG_TARGET_HAS_shs_vec          0
++#define TCG_TARGET_HAS_shv_vec          1
++#define TCG_TARGET_HAS_roti_vec         1
++#define TCG_TARGET_HAS_rots_vec         0
++#define TCG_TARGET_HAS_rotv_vec         1
++#define TCG_TARGET_HAS_sat_vec          1
++#define TCG_TARGET_HAS_minmax_vec       1
++#define TCG_TARGET_HAS_bitsel_vec       1
++#define TCG_TARGET_HAS_cmpsel_vec       0
++#define TCG_TARGET_HAS_tst_vec          0
++
++
++#endif
+diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/loongarch64/tcg-target.h
++++ b/tcg/loongarch64/tcg-target.h
+@@ -XXX,XX +XXX,XX @@
+ #ifndef LOONGARCH_TCG_TARGET_H
+ #define LOONGARCH_TCG_TARGET_H
+-#include "host/cpuinfo.h"
+-
+ #define TCG_TARGET_INSN_UNIT_SIZE 4
+ #define TCG_TARGET_NB_REGS 64
+@@ -XXX,XX +XXX,XX @@ typedef enum {
+     TCG_VEC_TMP0 = TCG_REG_V23,
+ } TCGReg;
+-/* optional instructions */
+-#define TCG_TARGET_HAS_negsetcond_i32   0
+-#define TCG_TARGET_HAS_div_i32          1
+-#define TCG_TARGET_HAS_rem_i32          1
+-#define TCG_TARGET_HAS_div2_i32         0
+-#define TCG_TARGET_HAS_rot_i32          1
+-#define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     0
+-#define TCG_TARGET_HAS_extract2_i32     0
+-#define TCG_TARGET_HAS_add2_i32         0
+-#define TCG_TARGET_HAS_sub2_i32         0
+-#define TCG_TARGET_HAS_mulu2_i32        0
+-#define TCG_TARGET_HAS_muls2_i32        0
+-#define TCG_TARGET_HAS_muluh_i32        1
+-#define TCG_TARGET_HAS_mulsh_i32        1
+-#define TCG_TARGET_HAS_ext8s_i32        1
+-#define TCG_TARGET_HAS_ext16s_i32       1
+-#define TCG_TARGET_HAS_ext8u_i32        1
+-#define TCG_TARGET_HAS_ext16u_i32       1
+-#define TCG_TARGET_HAS_bswap16_i32      1
+-#define TCG_TARGET_HAS_bswap32_i32      1
+-#define TCG_TARGET_HAS_not_i32          1
+-#define TCG_TARGET_HAS_andc_i32         1
+-#define TCG_TARGET_HAS_orc_i32          1
+-#define TCG_TARGET_HAS_eqv_i32          0
+-#define TCG_TARGET_HAS_nand_i32         0
+-#define TCG_TARGET_HAS_nor_i32          1
+-#define TCG_TARGET_HAS_clz_i32          1
+-#define TCG_TARGET_HAS_ctz_i32          1
+-#define TCG_TARGET_HAS_ctpop_i32        0
+-#define TCG_TARGET_HAS_brcond2          0
+-#define TCG_TARGET_HAS_setcond2         0
+-#define TCG_TARGET_HAS_qemu_st8_i32     0
+-
+-/* 64-bit operations */
+-#define TCG_TARGET_HAS_negsetcond_i64   0
+-#define TCG_TARGET_HAS_div_i64          1
+-#define TCG_TARGET_HAS_rem_i64          1
+-#define TCG_TARGET_HAS_div2_i64         0
+-#define TCG_TARGET_HAS_rot_i64          1
+-#define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     0
+-#define TCG_TARGET_HAS_extract2_i64     0
+-#define TCG_TARGET_HAS_extr_i64_i32     1
+-#define TCG_TARGET_HAS_ext8s_i64        1
+-#define TCG_TARGET_HAS_ext16s_i64       1
+-#define TCG_TARGET_HAS_ext32s_i64       1
+-#define TCG_TARGET_HAS_ext8u_i64        1
+-#define TCG_TARGET_HAS_ext16u_i64       1
+-#define TCG_TARGET_HAS_ext32u_i64       1
+-#define TCG_TARGET_HAS_bswap16_i64      1
+-#define TCG_TARGET_HAS_bswap32_i64      1
+-#define TCG_TARGET_HAS_bswap64_i64      1
+-#define TCG_TARGET_HAS_not_i64          1
+-#define TCG_TARGET_HAS_andc_i64         1
+-#define TCG_TARGET_HAS_orc_i64          1
+-#define TCG_TARGET_HAS_eqv_i64          0
+-#define TCG_TARGET_HAS_nand_i64         0
+-#define TCG_TARGET_HAS_nor_i64          1
+-#define TCG_TARGET_HAS_clz_i64          1
+-#define TCG_TARGET_HAS_ctz_i64          1
+-#define TCG_TARGET_HAS_ctpop_i64        0
+-#define TCG_TARGET_HAS_add2_i64         0
+-#define TCG_TARGET_HAS_sub2_i64         0
+-#define TCG_TARGET_HAS_mulu2_i64        0
+-#define TCG_TARGET_HAS_muls2_i64        0
+-#define TCG_TARGET_HAS_muluh_i64        1
+-#define TCG_TARGET_HAS_mulsh_i64        1
+-
+-#define TCG_TARGET_HAS_qemu_ldst_i128   (cpuinfo & CPUINFO_LSX)
+-
+-#define TCG_TARGET_HAS_tst              0
+-
+-#define TCG_TARGET_HAS_v64              (cpuinfo & CPUINFO_LSX)
+-#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_LSX)
+-#define TCG_TARGET_HAS_v256             (cpuinfo & CPUINFO_LASX)
+-
+-#define TCG_TARGET_HAS_not_vec          1
+-#define TCG_TARGET_HAS_neg_vec          1
+-#define TCG_TARGET_HAS_abs_vec          0
+-#define TCG_TARGET_HAS_andc_vec         1
+-#define TCG_TARGET_HAS_orc_vec          1
+-#define TCG_TARGET_HAS_nand_vec         0
+-#define TCG_TARGET_HAS_nor_vec          1
+-#define TCG_TARGET_HAS_eqv_vec          0
+-#define TCG_TARGET_HAS_mul_vec          1
+-#define TCG_TARGET_HAS_shi_vec          1
+-#define TCG_TARGET_HAS_shs_vec          0
+-#define TCG_TARGET_HAS_shv_vec          1
+-#define TCG_TARGET_HAS_roti_vec         1
+-#define TCG_TARGET_HAS_rots_vec         0
+-#define TCG_TARGET_HAS_rotv_vec         1
+-#define TCG_TARGET_HAS_sat_vec          1
+-#define TCG_TARGET_HAS_minmax_vec       1
+-#define TCG_TARGET_HAS_bitsel_vec       1
+-#define TCG_TARGET_HAS_cmpsel_vec       0
+-#define TCG_TARGET_HAS_tst_vec          0
++#include "tcg-target-has.h"
+ #define TCG_TARGET_DEFAULT_MO (0)
+--
+.43.0

-New patch
+[PULL 25/68] tcg/mips: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-8-philmd@linaro.org>
+---
+ tcg/mips/tcg-target-has.h | 122 ++++++++++++++++++++++++++++++++++++++
+ tcg/mips/tcg-target.h     | 112 +---------------------------------
+files changed, 123 insertions(+), 111 deletions(-)
+ create mode 100644 tcg/mips/tcg-target-has.h
+diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/tcg/mips/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Define target-specific opcode support
++ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
++ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
++ */
++
++#ifndef TCG_TARGET_HAS_H
++#define TCG_TARGET_HAS_H
++
++/* MOVN/MOVZ instructions detection */
++#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
++    defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
++    defined(_MIPS_ARCH_MIPS4)
++#define use_movnz_instructions  1
++#else
++extern bool use_movnz_instructions;
++#endif
++
++/* MIPS32 instruction set detection */
++#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
++#define use_mips32_instructions  1
++#else
++extern bool use_mips32_instructions;
++#endif
++
++/* MIPS32R2 instruction set detection */
++#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
++#define use_mips32r2_instructions  1
++#else
++extern bool use_mips32r2_instructions;
++#endif
++
++/* MIPS32R6 instruction set detection */
++#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
++#define use_mips32r6_instructions  1
++#else
++#define use_mips32r6_instructions  0
++#endif
++
++/* optional instructions */
++#define TCG_TARGET_HAS_div_i32          1
++#define TCG_TARGET_HAS_rem_i32          1
++#define TCG_TARGET_HAS_not_i32          1
++#define TCG_TARGET_HAS_nor_i32          1
++#define TCG_TARGET_HAS_andc_i32         0
++#define TCG_TARGET_HAS_orc_i32          0
++#define TCG_TARGET_HAS_eqv_i32          0
++#define TCG_TARGET_HAS_nand_i32         0
++#define TCG_TARGET_HAS_mulu2_i32        (!use_mips32r6_instructions)
++#define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
++#define TCG_TARGET_HAS_muluh_i32        1
++#define TCG_TARGET_HAS_mulsh_i32        1
++#define TCG_TARGET_HAS_bswap32_i32      1
++#define TCG_TARGET_HAS_negsetcond_i32   0
++
++#if TCG_TARGET_REG_BITS == 64
++#define TCG_TARGET_HAS_add2_i32         0
++#define TCG_TARGET_HAS_sub2_i32         0
++#define TCG_TARGET_HAS_extr_i64_i32     1
++#define TCG_TARGET_HAS_div_i64          1
++#define TCG_TARGET_HAS_rem_i64          1
++#define TCG_TARGET_HAS_not_i64          1
++#define TCG_TARGET_HAS_nor_i64          1
++#define TCG_TARGET_HAS_andc_i64         0
++#define TCG_TARGET_HAS_orc_i64          0
++#define TCG_TARGET_HAS_eqv_i64          0
++#define TCG_TARGET_HAS_nand_i64         0
++#define TCG_TARGET_HAS_add2_i64         0
++#define TCG_TARGET_HAS_sub2_i64         0
++#define TCG_TARGET_HAS_mulu2_i64        (!use_mips32r6_instructions)
++#define TCG_TARGET_HAS_muls2_i64        (!use_mips32r6_instructions)
++#define TCG_TARGET_HAS_muluh_i64        1
++#define TCG_TARGET_HAS_mulsh_i64        1
++#define TCG_TARGET_HAS_ext32s_i64       1
++#define TCG_TARGET_HAS_ext32u_i64       1
++#define TCG_TARGET_HAS_negsetcond_i64   0
++#endif
++
++/* optional instructions detected at runtime */
++#define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
++#define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
++#define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
++#define TCG_TARGET_HAS_sextract_i32     0
++#define TCG_TARGET_HAS_extract2_i32     0
++#define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
++#define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
++#define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions
++#define TCG_TARGET_HAS_clz_i32          use_mips32r2_instructions
++#define TCG_TARGET_HAS_ctz_i32          0
++#define TCG_TARGET_HAS_ctpop_i32        0
++#define TCG_TARGET_HAS_qemu_st8_i32     0
++
++#if TCG_TARGET_REG_BITS == 64
++#define TCG_TARGET_HAS_bswap16_i64      use_mips32r2_instructions
++#define TCG_TARGET_HAS_bswap32_i64      use_mips32r2_instructions
++#define TCG_TARGET_HAS_bswap64_i64      use_mips32r2_instructions
++#define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
++#define TCG_TARGET_HAS_extract_i64      use_mips32r2_instructions
++#define TCG_TARGET_HAS_sextract_i64     0
++#define TCG_TARGET_HAS_extract2_i64     0
++#define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
++#define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
++#define TCG_TARGET_HAS_rot_i64          use_mips32r2_instructions
++#define TCG_TARGET_HAS_clz_i64          use_mips32r2_instructions
++#define TCG_TARGET_HAS_ctz_i64          0
++#define TCG_TARGET_HAS_ctpop_i64        0
++#endif
++
++/* optional instructions automatically implemented */
++#define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
++#define TCG_TARGET_HAS_ext16u_i32       0 /* andi rt, rs, 0xffff */
++
++#if TCG_TARGET_REG_BITS == 64
++#define TCG_TARGET_HAS_ext8u_i64        0 /* andi rt, rs, 0xff   */
++#define TCG_TARGET_HAS_ext16u_i64       0 /* andi rt, rs, 0xffff */
++#endif
++
++#define TCG_TARGET_HAS_qemu_ldst_i128   0
++#define TCG_TARGET_HAS_tst              0
++
++#endif
+diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/mips/tcg-target.h
++++ b/tcg/mips/tcg-target.h
+@@ -XXX,XX +XXX,XX @@ typedef enum {
+     TCG_AREG0 = TCG_REG_S8,
+ } TCGReg;
+-/* MOVN/MOVZ instructions detection */
+-#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
+-    defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
+-    defined(_MIPS_ARCH_MIPS4)
+-#define use_movnz_instructions  1
+-#else
+-extern bool use_movnz_instructions;
+-#endif
+-
+-/* MIPS32 instruction set detection */
+-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
+-#define use_mips32_instructions  1
+-#else
+-extern bool use_mips32_instructions;
+-#endif
+-
+-/* MIPS32R2 instruction set detection */
+-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+-#define use_mips32r2_instructions  1
+-#else
+-extern bool use_mips32r2_instructions;
+-#endif
+-
+-/* MIPS32R6 instruction set detection */
+-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+-#define use_mips32r6_instructions  1
+-#else
+-#define use_mips32r6_instructions  0
+-#endif
+-
+-/* optional instructions */
+-#define TCG_TARGET_HAS_div_i32          1
+-#define TCG_TARGET_HAS_rem_i32          1
+-#define TCG_TARGET_HAS_not_i32          1
+-#define TCG_TARGET_HAS_nor_i32          1
+-#define TCG_TARGET_HAS_andc_i32         0
+-#define TCG_TARGET_HAS_orc_i32          0
+-#define TCG_TARGET_HAS_eqv_i32          0
+-#define TCG_TARGET_HAS_nand_i32         0
+-#define TCG_TARGET_HAS_mulu2_i32        (!use_mips32r6_instructions)
+-#define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
+-#define TCG_TARGET_HAS_muluh_i32        1
+-#define TCG_TARGET_HAS_mulsh_i32        1
+-#define TCG_TARGET_HAS_bswap32_i32      1
+-#define TCG_TARGET_HAS_negsetcond_i32   0
+-
+-#if TCG_TARGET_REG_BITS == 64
+-#define TCG_TARGET_HAS_add2_i32         0
+-#define TCG_TARGET_HAS_sub2_i32         0
+-#define TCG_TARGET_HAS_extr_i64_i32     1
+-#define TCG_TARGET_HAS_div_i64          1
+-#define TCG_TARGET_HAS_rem_i64          1
+-#define TCG_TARGET_HAS_not_i64          1
+-#define TCG_TARGET_HAS_nor_i64          1
+-#define TCG_TARGET_HAS_andc_i64         0
+-#define TCG_TARGET_HAS_orc_i64          0
+-#define TCG_TARGET_HAS_eqv_i64          0
+-#define TCG_TARGET_HAS_nand_i64         0
+-#define TCG_TARGET_HAS_add2_i64         0
+-#define TCG_TARGET_HAS_sub2_i64         0
+-#define TCG_TARGET_HAS_mulu2_i64        (!use_mips32r6_instructions)
+-#define TCG_TARGET_HAS_muls2_i64        (!use_mips32r6_instructions)
+-#define TCG_TARGET_HAS_muluh_i64        1
+-#define TCG_TARGET_HAS_mulsh_i64        1
+-#define TCG_TARGET_HAS_ext32s_i64       1
+-#define TCG_TARGET_HAS_ext32u_i64       1
+-#define TCG_TARGET_HAS_negsetcond_i64   0
+-#endif
+-
+-/* optional instructions detected at runtime */
+-#define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_sextract_i32     0
+-#define TCG_TARGET_HAS_extract2_i32     0
+-#define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
+-#define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
+-#define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions
+-#define TCG_TARGET_HAS_clz_i32          use_mips32r2_instructions
+-#define TCG_TARGET_HAS_ctz_i32          0
+-#define TCG_TARGET_HAS_ctpop_i32        0
+-#define TCG_TARGET_HAS_qemu_st8_i32     0
+-
+-#if TCG_TARGET_REG_BITS == 64
+-#define TCG_TARGET_HAS_bswap16_i64      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_bswap32_i64      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_bswap64_i64      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_extract_i64      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_sextract_i64     0
+-#define TCG_TARGET_HAS_extract2_i64     0
+-#define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
+-#define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
+-#define TCG_TARGET_HAS_rot_i64          use_mips32r2_instructions
+-#define TCG_TARGET_HAS_clz_i64          use_mips32r2_instructions
+-#define TCG_TARGET_HAS_ctz_i64          0
+-#define TCG_TARGET_HAS_ctpop_i64        0
+-#endif
+-
+-/* optional instructions automatically implemented */
+-#define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
+-#define TCG_TARGET_HAS_ext16u_i32       0 /* andi rt, rs, 0xffff */
+-
+-#if TCG_TARGET_REG_BITS == 64
+-#define TCG_TARGET_HAS_ext8u_i64        0 /* andi rt, rs, 0xff   */
+-#define TCG_TARGET_HAS_ext16u_i64       0 /* andi rt, rs, 0xffff */
+-#endif
+-
+-#define TCG_TARGET_HAS_qemu_ldst_i128   0
+-
+-#define TCG_TARGET_HAS_tst              0
++#include "tcg-target-has.h"
+ #define TCG_TARGET_DEFAULT_MO           0
+--
+.43.0

-New patch
+[PULL 26/68] tcg/ppc: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-9-philmd@linaro.org>
+---
+ tcg/ppc/tcg-target-has.h | 124 +++++++++++++++++++++++++++++++++++++++
+ tcg/ppc/tcg-target.h     | 114 +----------------------------------
+files changed, 125 insertions(+), 113 deletions(-)
+ create mode 100644 tcg/ppc/tcg-target-has.h
+diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/tcg/ppc/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Define target-specific opcode support
++ * Copyright (c) 2008 Fabrice Bellard
++ */
++
++#ifndef TCG_TARGET_HAS_H
++#define TCG_TARGET_HAS_H
++
++#include "host/cpuinfo.h"
++
++#define have_isa_2_06  (cpuinfo & CPUINFO_V2_06)
++#define have_isa_2_07  (cpuinfo & CPUINFO_V2_07)
++#define have_isa_3_00  (cpuinfo & CPUINFO_V3_0)
++#define have_isa_3_10  (cpuinfo & CPUINFO_V3_1)
++#define have_altivec   (cpuinfo & CPUINFO_ALTIVEC)
++#define have_vsx       (cpuinfo & CPUINFO_VSX)
++
++/* optional instructions automatically implemented */
++#define TCG_TARGET_HAS_ext8u_i32        0 /* andi */
++#define TCG_TARGET_HAS_ext16u_i32       0
++
++/* optional instructions */
++#define TCG_TARGET_HAS_div_i32          1
++#define TCG_TARGET_HAS_rem_i32          have_isa_3_00
++#define TCG_TARGET_HAS_rot_i32          1
++#define TCG_TARGET_HAS_ext8s_i32        1
++#define TCG_TARGET_HAS_ext16s_i32       1
++#define TCG_TARGET_HAS_bswap16_i32      1
++#define TCG_TARGET_HAS_bswap32_i32      1
++#define TCG_TARGET_HAS_not_i32          1
++#define TCG_TARGET_HAS_andc_i32         1
++#define TCG_TARGET_HAS_orc_i32          1
++#define TCG_TARGET_HAS_eqv_i32          1
++#define TCG_TARGET_HAS_nand_i32         1
++#define TCG_TARGET_HAS_nor_i32          1
++#define TCG_TARGET_HAS_clz_i32          1
++#define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
++#define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
++#define TCG_TARGET_HAS_deposit_i32      1
++#define TCG_TARGET_HAS_extract_i32      1
++#define TCG_TARGET_HAS_sextract_i32     0
++#define TCG_TARGET_HAS_extract2_i32     0
++#define TCG_TARGET_HAS_negsetcond_i32   1
++#define TCG_TARGET_HAS_mulu2_i32        0
++#define TCG_TARGET_HAS_muls2_i32        0
++#define TCG_TARGET_HAS_muluh_i32        1
++#define TCG_TARGET_HAS_mulsh_i32        1
++#define TCG_TARGET_HAS_qemu_st8_i32     0
++
++#if TCG_TARGET_REG_BITS == 64
++#define TCG_TARGET_HAS_add2_i32         0
++#define TCG_TARGET_HAS_sub2_i32         0
++#define TCG_TARGET_HAS_extr_i64_i32     0
++#define TCG_TARGET_HAS_div_i64          1
++#define TCG_TARGET_HAS_rem_i64          have_isa_3_00
++#define TCG_TARGET_HAS_rot_i64          1
++#define TCG_TARGET_HAS_ext8s_i64        1
++#define TCG_TARGET_HAS_ext16s_i64       1
++#define TCG_TARGET_HAS_ext32s_i64       1
++#define TCG_TARGET_HAS_ext8u_i64        0
++#define TCG_TARGET_HAS_ext16u_i64       0
++#define TCG_TARGET_HAS_ext32u_i64       0
++#define TCG_TARGET_HAS_bswap16_i64      1
++#define TCG_TARGET_HAS_bswap32_i64      1
++#define TCG_TARGET_HAS_bswap64_i64      1
++#define TCG_TARGET_HAS_not_i64          1
++#define TCG_TARGET_HAS_andc_i64         1
++#define TCG_TARGET_HAS_orc_i64          1
++#define TCG_TARGET_HAS_eqv_i64          1
++#define TCG_TARGET_HAS_nand_i64         1
++#define TCG_TARGET_HAS_nor_i64          1
++#define TCG_TARGET_HAS_clz_i64          1
++#define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
++#define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
++#define TCG_TARGET_HAS_deposit_i64      1
++#define TCG_TARGET_HAS_extract_i64      1
++#define TCG_TARGET_HAS_sextract_i64     0
++#define TCG_TARGET_HAS_extract2_i64     0
++#define TCG_TARGET_HAS_negsetcond_i64   1
++#define TCG_TARGET_HAS_add2_i64         1
++#define TCG_TARGET_HAS_sub2_i64         1
++#define TCG_TARGET_HAS_mulu2_i64        0
++#define TCG_TARGET_HAS_muls2_i64        0
++#define TCG_TARGET_HAS_muluh_i64        1
++#define TCG_TARGET_HAS_mulsh_i64        1
++#endif
++
++#define TCG_TARGET_HAS_qemu_ldst_i128   \
++    (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
++
++#define TCG_TARGET_HAS_tst              1
++
++/*
++ * While technically Altivec could support V64, it has no 64-bit store
++ * instruction and substituting two 32-bit stores makes the generated
++ * code quite large.
++ */
++#define TCG_TARGET_HAS_v64              have_vsx
++#define TCG_TARGET_HAS_v128             have_altivec
++#define TCG_TARGET_HAS_v256             0
++
++#define TCG_TARGET_HAS_andc_vec         1
++#define TCG_TARGET_HAS_orc_vec          have_isa_2_07
++#define TCG_TARGET_HAS_nand_vec         have_isa_2_07
++#define TCG_TARGET_HAS_nor_vec          1
++#define TCG_TARGET_HAS_eqv_vec          have_isa_2_07
++#define TCG_TARGET_HAS_not_vec          1
++#define TCG_TARGET_HAS_neg_vec          have_isa_3_00
++#define TCG_TARGET_HAS_abs_vec          0
++#define TCG_TARGET_HAS_roti_vec         0
++#define TCG_TARGET_HAS_rots_vec         0
++#define TCG_TARGET_HAS_rotv_vec         1
++#define TCG_TARGET_HAS_shi_vec          0
++#define TCG_TARGET_HAS_shs_vec          0
++#define TCG_TARGET_HAS_shv_vec          1
++#define TCG_TARGET_HAS_mul_vec          1
++#define TCG_TARGET_HAS_sat_vec          1
++#define TCG_TARGET_HAS_minmax_vec       1
++#define TCG_TARGET_HAS_bitsel_vec       have_vsx
++#define TCG_TARGET_HAS_cmpsel_vec       1
++#define TCG_TARGET_HAS_tst_vec          0
++
++#endif
+diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/ppc/tcg-target.h
++++ b/tcg/ppc/tcg-target.h
+@@ -XXX,XX +XXX,XX @@
+ #ifndef PPC_TCG_TARGET_H
+ #define PPC_TCG_TARGET_H
+-#include "host/cpuinfo.h"
+-
+ #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+ #define TCG_TARGET_NB_REGS 64
+@@ -XXX,XX +XXX,XX @@ typedef enum {
+     TCG_AREG0 = TCG_REG_R27
+ } TCGReg;
+-#define have_isa_2_06  (cpuinfo & CPUINFO_V2_06)
+-#define have_isa_2_07  (cpuinfo & CPUINFO_V2_07)
+-#define have_isa_3_00  (cpuinfo & CPUINFO_V3_0)
+-#define have_isa_3_10  (cpuinfo & CPUINFO_V3_1)
+-#define have_altivec   (cpuinfo & CPUINFO_ALTIVEC)
+-#define have_vsx       (cpuinfo & CPUINFO_VSX)
+-
+-/* optional instructions automatically implemented */
+-#define TCG_TARGET_HAS_ext8u_i32        0 /* andi */
+-#define TCG_TARGET_HAS_ext16u_i32       0
+-
+-/* optional instructions */
+-#define TCG_TARGET_HAS_div_i32          1
+-#define TCG_TARGET_HAS_rem_i32          have_isa_3_00
+-#define TCG_TARGET_HAS_rot_i32          1
+-#define TCG_TARGET_HAS_ext8s_i32        1
+-#define TCG_TARGET_HAS_ext16s_i32       1
+-#define TCG_TARGET_HAS_bswap16_i32      1
+-#define TCG_TARGET_HAS_bswap32_i32      1
+-#define TCG_TARGET_HAS_not_i32          1
+-#define TCG_TARGET_HAS_andc_i32         1
+-#define TCG_TARGET_HAS_orc_i32          1
+-#define TCG_TARGET_HAS_eqv_i32          1
+-#define TCG_TARGET_HAS_nand_i32         1
+-#define TCG_TARGET_HAS_nor_i32          1
+-#define TCG_TARGET_HAS_clz_i32          1
+-#define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
+-#define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
+-#define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     0
+-#define TCG_TARGET_HAS_extract2_i32     0
+-#define TCG_TARGET_HAS_negsetcond_i32   1
+-#define TCG_TARGET_HAS_mulu2_i32        0
+-#define TCG_TARGET_HAS_muls2_i32        0
+-#define TCG_TARGET_HAS_muluh_i32        1
+-#define TCG_TARGET_HAS_mulsh_i32        1
+-#define TCG_TARGET_HAS_qemu_st8_i32     0
+-
+-#if TCG_TARGET_REG_BITS == 64
+-#define TCG_TARGET_HAS_add2_i32         0
+-#define TCG_TARGET_HAS_sub2_i32         0
+-#define TCG_TARGET_HAS_extr_i64_i32     0
+-#define TCG_TARGET_HAS_div_i64          1
+-#define TCG_TARGET_HAS_rem_i64          have_isa_3_00
+-#define TCG_TARGET_HAS_rot_i64          1
+-#define TCG_TARGET_HAS_ext8s_i64        1
+-#define TCG_TARGET_HAS_ext16s_i64       1
+-#define TCG_TARGET_HAS_ext32s_i64       1
+-#define TCG_TARGET_HAS_ext8u_i64        0
+-#define TCG_TARGET_HAS_ext16u_i64       0
+-#define TCG_TARGET_HAS_ext32u_i64       0
+-#define TCG_TARGET_HAS_bswap16_i64      1
+-#define TCG_TARGET_HAS_bswap32_i64      1
+-#define TCG_TARGET_HAS_bswap64_i64      1
+-#define TCG_TARGET_HAS_not_i64          1
+-#define TCG_TARGET_HAS_andc_i64         1
+-#define TCG_TARGET_HAS_orc_i64          1
+-#define TCG_TARGET_HAS_eqv_i64          1
+-#define TCG_TARGET_HAS_nand_i64         1
+-#define TCG_TARGET_HAS_nor_i64          1
+-#define TCG_TARGET_HAS_clz_i64          1
+-#define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
+-#define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
+-#define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     0
+-#define TCG_TARGET_HAS_extract2_i64     0
+-#define TCG_TARGET_HAS_negsetcond_i64   1
+-#define TCG_TARGET_HAS_add2_i64         1
+-#define TCG_TARGET_HAS_sub2_i64         1
+-#define TCG_TARGET_HAS_mulu2_i64        0
+-#define TCG_TARGET_HAS_muls2_i64        0
+-#define TCG_TARGET_HAS_muluh_i64        1
+-#define TCG_TARGET_HAS_mulsh_i64        1
+-#endif
+-
+-#define TCG_TARGET_HAS_qemu_ldst_i128   \
+-    (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
+-
+-#define TCG_TARGET_HAS_tst              1
+-
+-/*
+- * While technically Altivec could support V64, it has no 64-bit store
+- * instruction and substituting two 32-bit stores makes the generated
+- * code quite large.
+- */
+-#define TCG_TARGET_HAS_v64              have_vsx
+-#define TCG_TARGET_HAS_v128             have_altivec
+-#define TCG_TARGET_HAS_v256             0
+-
+-#define TCG_TARGET_HAS_andc_vec         1
+-#define TCG_TARGET_HAS_orc_vec          have_isa_2_07
+-#define TCG_TARGET_HAS_nand_vec         have_isa_2_07
+-#define TCG_TARGET_HAS_nor_vec          1
+-#define TCG_TARGET_HAS_eqv_vec          have_isa_2_07
+-#define TCG_TARGET_HAS_not_vec          1
+-#define TCG_TARGET_HAS_neg_vec          have_isa_3_00
+-#define TCG_TARGET_HAS_abs_vec          0
+-#define TCG_TARGET_HAS_roti_vec         0
+-#define TCG_TARGET_HAS_rots_vec         0
+-#define TCG_TARGET_HAS_rotv_vec         1
+-#define TCG_TARGET_HAS_shi_vec          0
+-#define TCG_TARGET_HAS_shs_vec          0
+-#define TCG_TARGET_HAS_shv_vec          1
+-#define TCG_TARGET_HAS_mul_vec          1
+-#define TCG_TARGET_HAS_sat_vec          1
+-#define TCG_TARGET_HAS_minmax_vec       1
+-#define TCG_TARGET_HAS_bitsel_vec       have_vsx
+-#define TCG_TARGET_HAS_cmpsel_vec       1
+-#define TCG_TARGET_HAS_tst_vec          0
++#include "tcg-target-has.h"
+ #define TCG_TARGET_DEFAULT_MO (0)
+--
+.43.0

-New patch
+[PULL 27/68] tcg/riscv: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-10-philmd@linaro.org>
+---
+ tcg/riscv/tcg-target-has.h | 112 +++++++++++++++++++++++++++++++++++++
+ tcg/riscv/tcg-target.h     | 102 +--------------------------------
+files changed, 113 insertions(+), 101 deletions(-)
+ create mode 100644 tcg/riscv/tcg-target-has.h
+diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/tcg/riscv/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Define target-specific opcode support
++ * Copyright (c) 2018 SiFive, Inc
++ */
++
++#ifndef TCG_TARGET_HAS_H
++#define TCG_TARGET_HAS_H
++
++#include "host/cpuinfo.h"
++
++/* optional instructions */
++#define TCG_TARGET_HAS_negsetcond_i32   1
++#define TCG_TARGET_HAS_div_i32          1
++#define TCG_TARGET_HAS_rem_i32          1
++#define TCG_TARGET_HAS_div2_i32         0
++#define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_deposit_i32      0
++#define TCG_TARGET_HAS_extract_i32      0
++#define TCG_TARGET_HAS_sextract_i32     0
++#define TCG_TARGET_HAS_extract2_i32     0
++#define TCG_TARGET_HAS_add2_i32         1
++#define TCG_TARGET_HAS_sub2_i32         1
++#define TCG_TARGET_HAS_mulu2_i32        0
++#define TCG_TARGET_HAS_muls2_i32        0
++#define TCG_TARGET_HAS_muluh_i32        0
++#define TCG_TARGET_HAS_mulsh_i32        0
++#define TCG_TARGET_HAS_ext8s_i32        1
++#define TCG_TARGET_HAS_ext16s_i32       1
++#define TCG_TARGET_HAS_ext8u_i32        1
++#define TCG_TARGET_HAS_ext16u_i32       1
++#define TCG_TARGET_HAS_bswap16_i32      (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_bswap32_i32      (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_not_i32          1
++#define TCG_TARGET_HAS_andc_i32         (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_orc_i32          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_eqv_i32          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_nand_i32         0
++#define TCG_TARGET_HAS_nor_i32          0
++#define TCG_TARGET_HAS_clz_i32          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_ctz_i32          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_ctpop_i32        (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_brcond2          1
++#define TCG_TARGET_HAS_setcond2         1
++#define TCG_TARGET_HAS_qemu_st8_i32     0
++
++#define TCG_TARGET_HAS_negsetcond_i64   1
++#define TCG_TARGET_HAS_div_i64          1
++#define TCG_TARGET_HAS_rem_i64          1
++#define TCG_TARGET_HAS_div2_i64         0
++#define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_deposit_i64      0
++#define TCG_TARGET_HAS_extract_i64      0
++#define TCG_TARGET_HAS_sextract_i64     0
++#define TCG_TARGET_HAS_extract2_i64     0
++#define TCG_TARGET_HAS_extr_i64_i32     1
++#define TCG_TARGET_HAS_ext8s_i64        1
++#define TCG_TARGET_HAS_ext16s_i64       1
++#define TCG_TARGET_HAS_ext32s_i64       1
++#define TCG_TARGET_HAS_ext8u_i64        1
++#define TCG_TARGET_HAS_ext16u_i64       1
++#define TCG_TARGET_HAS_ext32u_i64       1
++#define TCG_TARGET_HAS_bswap16_i64      (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_bswap32_i64      (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_bswap64_i64      (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_not_i64          1
++#define TCG_TARGET_HAS_andc_i64         (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_orc_i64          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_eqv_i64          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_nand_i64         0
++#define TCG_TARGET_HAS_nor_i64          0
++#define TCG_TARGET_HAS_clz_i64          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_ctz_i64          (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_ctpop_i64        (cpuinfo & CPUINFO_ZBB)
++#define TCG_TARGET_HAS_add2_i64         1
++#define TCG_TARGET_HAS_sub2_i64         1
++#define TCG_TARGET_HAS_mulu2_i64        0
++#define TCG_TARGET_HAS_muls2_i64        0
++#define TCG_TARGET_HAS_muluh_i64        1
++#define TCG_TARGET_HAS_mulsh_i64        1
++
++#define TCG_TARGET_HAS_qemu_ldst_i128   0
++
++#define TCG_TARGET_HAS_tst              0
++
++/* vector instructions */
++#define TCG_TARGET_HAS_v64              (cpuinfo & CPUINFO_ZVE64X)
++#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_ZVE64X)
++#define TCG_TARGET_HAS_v256             (cpuinfo & CPUINFO_ZVE64X)
++#define TCG_TARGET_HAS_andc_vec         0
++#define TCG_TARGET_HAS_orc_vec          0
++#define TCG_TARGET_HAS_nand_vec         0
++#define TCG_TARGET_HAS_nor_vec          0
++#define TCG_TARGET_HAS_eqv_vec          0
++#define TCG_TARGET_HAS_not_vec          1
++#define TCG_TARGET_HAS_neg_vec          1
++#define TCG_TARGET_HAS_abs_vec          0
++#define TCG_TARGET_HAS_roti_vec         1
++#define TCG_TARGET_HAS_rots_vec         1
++#define TCG_TARGET_HAS_rotv_vec         1
++#define TCG_TARGET_HAS_shi_vec          1
++#define TCG_TARGET_HAS_shs_vec          1
++#define TCG_TARGET_HAS_shv_vec          1
++#define TCG_TARGET_HAS_mul_vec          1
++#define TCG_TARGET_HAS_sat_vec          1
++#define TCG_TARGET_HAS_minmax_vec       1
++#define TCG_TARGET_HAS_bitsel_vec       0
++#define TCG_TARGET_HAS_cmpsel_vec       1
++
++#define TCG_TARGET_HAS_tst_vec          0
++
++#endif
+diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/riscv/tcg-target.h
++++ b/tcg/riscv/tcg-target.h
+@@ -XXX,XX +XXX,XX @@
+ #ifndef RISCV_TCG_TARGET_H
+ #define RISCV_TCG_TARGET_H
+-#include "host/cpuinfo.h"
+-
+ #define TCG_TARGET_INSN_UNIT_SIZE 4
+ #define TCG_TARGET_NB_REGS 64
+ #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+@@ -XXX,XX +XXX,XX @@ typedef enum {
+     TCG_REG_TMP2       = TCG_REG_T4,
+ } TCGReg;
+-/* optional instructions */
+-#define TCG_TARGET_HAS_negsetcond_i32   1
+-#define TCG_TARGET_HAS_div_i32          1
+-#define TCG_TARGET_HAS_rem_i32          1
+-#define TCG_TARGET_HAS_div2_i32         0
+-#define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_deposit_i32      0
+-#define TCG_TARGET_HAS_extract_i32      0
+-#define TCG_TARGET_HAS_sextract_i32     0
+-#define TCG_TARGET_HAS_extract2_i32     0
+-#define TCG_TARGET_HAS_add2_i32         1
+-#define TCG_TARGET_HAS_sub2_i32         1
+-#define TCG_TARGET_HAS_mulu2_i32        0
+-#define TCG_TARGET_HAS_muls2_i32        0
+-#define TCG_TARGET_HAS_muluh_i32        0
+-#define TCG_TARGET_HAS_mulsh_i32        0
+-#define TCG_TARGET_HAS_ext8s_i32        1
+-#define TCG_TARGET_HAS_ext16s_i32       1
+-#define TCG_TARGET_HAS_ext8u_i32        1
+-#define TCG_TARGET_HAS_ext16u_i32       1
+-#define TCG_TARGET_HAS_bswap16_i32      (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_bswap32_i32      (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_not_i32          1
+-#define TCG_TARGET_HAS_andc_i32         (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_orc_i32          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_eqv_i32          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_nand_i32         0
+-#define TCG_TARGET_HAS_nor_i32          0
+-#define TCG_TARGET_HAS_clz_i32          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_ctz_i32          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_ctpop_i32        (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_brcond2          1
+-#define TCG_TARGET_HAS_setcond2         1
+-#define TCG_TARGET_HAS_qemu_st8_i32     0
+-
+-#define TCG_TARGET_HAS_negsetcond_i64   1
+-#define TCG_TARGET_HAS_div_i64          1
+-#define TCG_TARGET_HAS_rem_i64          1
+-#define TCG_TARGET_HAS_div2_i64         0
+-#define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_deposit_i64      0
+-#define TCG_TARGET_HAS_extract_i64      0
+-#define TCG_TARGET_HAS_sextract_i64     0
+-#define TCG_TARGET_HAS_extract2_i64     0
+-#define TCG_TARGET_HAS_extr_i64_i32     1
+-#define TCG_TARGET_HAS_ext8s_i64        1
+-#define TCG_TARGET_HAS_ext16s_i64       1
+-#define TCG_TARGET_HAS_ext32s_i64       1
+-#define TCG_TARGET_HAS_ext8u_i64        1
+-#define TCG_TARGET_HAS_ext16u_i64       1
+-#define TCG_TARGET_HAS_ext32u_i64       1
+-#define TCG_TARGET_HAS_bswap16_i64      (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_bswap32_i64      (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_bswap64_i64      (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_not_i64          1
+-#define TCG_TARGET_HAS_andc_i64         (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_orc_i64          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_eqv_i64          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_nand_i64         0
+-#define TCG_TARGET_HAS_nor_i64          0
+-#define TCG_TARGET_HAS_clz_i64          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_ctz_i64          (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_ctpop_i64        (cpuinfo & CPUINFO_ZBB)
+-#define TCG_TARGET_HAS_add2_i64         1
+-#define TCG_TARGET_HAS_sub2_i64         1
+-#define TCG_TARGET_HAS_mulu2_i64        0
+-#define TCG_TARGET_HAS_muls2_i64        0
+-#define TCG_TARGET_HAS_muluh_i64        1
+-#define TCG_TARGET_HAS_mulsh_i64        1
+-
+-#define TCG_TARGET_HAS_qemu_ldst_i128   0
+-
+-#define TCG_TARGET_HAS_tst              0
+-
+-/* vector instructions */
+-#define TCG_TARGET_HAS_v64              (cpuinfo & CPUINFO_ZVE64X)
+-#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_ZVE64X)
+-#define TCG_TARGET_HAS_v256             (cpuinfo & CPUINFO_ZVE64X)
+-#define TCG_TARGET_HAS_andc_vec         0
+-#define TCG_TARGET_HAS_orc_vec          0
+-#define TCG_TARGET_HAS_nand_vec         0
+-#define TCG_TARGET_HAS_nor_vec          0
+-#define TCG_TARGET_HAS_eqv_vec          0
+-#define TCG_TARGET_HAS_not_vec          1
+-#define TCG_TARGET_HAS_neg_vec          1
+-#define TCG_TARGET_HAS_abs_vec          0
+-#define TCG_TARGET_HAS_roti_vec         1
+-#define TCG_TARGET_HAS_rots_vec         1
+-#define TCG_TARGET_HAS_rotv_vec         1
+-#define TCG_TARGET_HAS_shi_vec          1
+-#define TCG_TARGET_HAS_shs_vec          1
+-#define TCG_TARGET_HAS_shv_vec          1
+-#define TCG_TARGET_HAS_mul_vec          1
+-#define TCG_TARGET_HAS_sat_vec          1
+-#define TCG_TARGET_HAS_minmax_vec       1
+-#define TCG_TARGET_HAS_bitsel_vec       0
+-#define TCG_TARGET_HAS_cmpsel_vec       1
+-
+-#define TCG_TARGET_HAS_tst_vec          0
++#include "tcg-target-has.h"
+ #define TCG_TARGET_DEFAULT_MO (0)
+--
+.43.0

-[PULL 32/56] tcg/optimize: Split out fold_bswap
+[PULL 28/68] tcg/s390x: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-11-philmd@linaro.org>
 ---
- tcg/optimize.c | 27 ++++++++++++++++-----------
+ tcg/s390x/tcg-target-has.h | 124 +++++++++++++++++++++++++++++++++++++
-file changed, 16 insertions(+), 11 deletions(-)
+ tcg/s390x/tcg-target.h     | 114 +---------------------------------
 files changed, 125 insertions(+), 113 deletions(-)
  create mode 100644 tcg/s390x/tcg-target-has.h
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/s390x/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific opcode support
 + * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
 + */
 +
 +#ifndef TCG_TARGET_HAS_H
 +#define TCG_TARGET_HAS_H
 +
 +/* Facilities required for proper operation; checked at startup. */
 +
 +#define FACILITY_ZARCH_ACTIVE         2
 +#define FACILITY_LONG_DISP            18
 +#define FACILITY_EXT_IMM              21
 +#define FACILITY_GEN_INST_EXT         34
 +#define FACILITY_45                   45
 +
 +/* Facilities that are checked at runtime. */
 +
 +#define FACILITY_LOAD_ON_COND2        53
 +#define FACILITY_MISC_INSN_EXT2       58
 +#define FACILITY_MISC_INSN_EXT3       61
 +#define FACILITY_VECTOR               129
 +#define FACILITY_VECTOR_ENH1          135
 +
 +extern uint64_t s390_facilities[3];
 +
 +#define HAVE_FACILITY(X) \
 +    ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
 +
 +/* optional instructions */
 +#define TCG_TARGET_HAS_div2_i32       1
 +#define TCG_TARGET_HAS_rot_i32        1
 +#define TCG_TARGET_HAS_ext8s_i32      1
 +#define TCG_TARGET_HAS_ext16s_i32     1
 +#define TCG_TARGET_HAS_ext8u_i32      1
 +#define TCG_TARGET_HAS_ext16u_i32     1
 +#define TCG_TARGET_HAS_bswap16_i32    1
 +#define TCG_TARGET_HAS_bswap32_i32    1
 +#define TCG_TARGET_HAS_not_i32        HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_andc_i32       HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_orc_i32        HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_eqv_i32        HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_nand_i32       HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_nor_i32        HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_clz_i32        0
 +#define TCG_TARGET_HAS_ctz_i32        0
 +#define TCG_TARGET_HAS_ctpop_i32      1
 +#define TCG_TARGET_HAS_deposit_i32    1
 +#define TCG_TARGET_HAS_extract_i32    1
 +#define TCG_TARGET_HAS_sextract_i32   0
 +#define TCG_TARGET_HAS_extract2_i32   0
 +#define TCG_TARGET_HAS_negsetcond_i32 1
 +#define TCG_TARGET_HAS_add2_i32       1
 +#define TCG_TARGET_HAS_sub2_i32       1
 +#define TCG_TARGET_HAS_mulu2_i32      0
 +#define TCG_TARGET_HAS_muls2_i32      0
 +#define TCG_TARGET_HAS_muluh_i32      0
 +#define TCG_TARGET_HAS_mulsh_i32      0
 +#define TCG_TARGET_HAS_extr_i64_i32   0
 +#define TCG_TARGET_HAS_qemu_st8_i32   0
 +
 +#define TCG_TARGET_HAS_div2_i64       1
 +#define TCG_TARGET_HAS_rot_i64        1
 +#define TCG_TARGET_HAS_ext8s_i64      1
 +#define TCG_TARGET_HAS_ext16s_i64     1
 +#define TCG_TARGET_HAS_ext32s_i64     1
 +#define TCG_TARGET_HAS_ext8u_i64      1
 +#define TCG_TARGET_HAS_ext16u_i64     1
 +#define TCG_TARGET_HAS_ext32u_i64     1
 +#define TCG_TARGET_HAS_bswap16_i64    1
 +#define TCG_TARGET_HAS_bswap32_i64    1
 +#define TCG_TARGET_HAS_bswap64_i64    1
 +#define TCG_TARGET_HAS_not_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_andc_i64       HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_orc_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_eqv_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_nand_i64       HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_nor_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 +#define TCG_TARGET_HAS_clz_i64        1
 +#define TCG_TARGET_HAS_ctz_i64        0
 +#define TCG_TARGET_HAS_ctpop_i64      1
 +#define TCG_TARGET_HAS_deposit_i64    1
 +#define TCG_TARGET_HAS_extract_i64    1
 +#define TCG_TARGET_HAS_sextract_i64   0
 +#define TCG_TARGET_HAS_extract2_i64   0
 +#define TCG_TARGET_HAS_negsetcond_i64 1
 +#define TCG_TARGET_HAS_add2_i64       1
 +#define TCG_TARGET_HAS_sub2_i64       1
 +#define TCG_TARGET_HAS_mulu2_i64      1
 +#define TCG_TARGET_HAS_muls2_i64      HAVE_FACILITY(MISC_INSN_EXT2)
 +#define TCG_TARGET_HAS_muluh_i64      0
 +#define TCG_TARGET_HAS_mulsh_i64      0
 +
 +#define TCG_TARGET_HAS_qemu_ldst_i128 1
 +
 +#define TCG_TARGET_HAS_tst            1
 +
 +#define TCG_TARGET_HAS_v64            HAVE_FACILITY(VECTOR)
 +#define TCG_TARGET_HAS_v128           HAVE_FACILITY(VECTOR)
 +#define TCG_TARGET_HAS_v256           0
 +
 +#define TCG_TARGET_HAS_andc_vec       1
 +#define TCG_TARGET_HAS_orc_vec        HAVE_FACILITY(VECTOR_ENH1)
 +#define TCG_TARGET_HAS_nand_vec       HAVE_FACILITY(VECTOR_ENH1)
 +#define TCG_TARGET_HAS_nor_vec        1
 +#define TCG_TARGET_HAS_eqv_vec        HAVE_FACILITY(VECTOR_ENH1)
 +#define TCG_TARGET_HAS_not_vec        1
 +#define TCG_TARGET_HAS_neg_vec        1
 +#define TCG_TARGET_HAS_abs_vec        1
 +#define TCG_TARGET_HAS_roti_vec       1
 +#define TCG_TARGET_HAS_rots_vec       1
 +#define TCG_TARGET_HAS_rotv_vec       1
 +#define TCG_TARGET_HAS_shi_vec        1
 +#define TCG_TARGET_HAS_shs_vec        1
 +#define TCG_TARGET_HAS_shv_vec        1
 +#define TCG_TARGET_HAS_mul_vec        1
 +#define TCG_TARGET_HAS_sat_vec        0
 +#define TCG_TARGET_HAS_minmax_vec     1
 +#define TCG_TARGET_HAS_bitsel_vec     1
 +#define TCG_TARGET_HAS_cmpsel_vec     1
 +#define TCG_TARGET_HAS_tst_vec        0
 +
 +#endif
 diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/s390x/tcg-target.h
-+++ b/tcg/optimize.c
++++ b/tcg/s390x/tcg-target.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
-     return false;
- }
+ #define TCG_TARGET_NB_REGS 64
-+static bool fold_bswap(OptContext *ctx, TCGOp *op)
+-/* Facilities required for proper operation; checked at startup. */
-+{
+-
-+    if (arg_is_const(op->args[1])) {
+-#define FACILITY_ZARCH_ACTIVE         2
-+        uint64_t t = arg_info(op->args[1])->val;
+-#define FACILITY_LONG_DISP            18
-+
+-#define FACILITY_EXT_IMM              21
-+        t = do_constant_folding(op->opc, t, op->args[2]);
+-#define FACILITY_GEN_INST_EXT         34
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+-#define FACILITY_45                   45
-+    }
+-
-+    return false;
+-/* Facilities that are checked at runtime. */
-+}
+-
-+
+-#define FACILITY_LOAD_ON_COND2        53
- static bool fold_call(OptContext *ctx, TCGOp *op)
+-#define FACILITY_MISC_INSN_EXT2       58
- {
+-#define FACILITY_MISC_INSN_EXT3       61
-     TCGContext *s = ctx->tcg;
+-#define FACILITY_VECTOR               129
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+-#define FACILITY_VECTOR_ENH1          135
-             }
+-
-             break;
+-extern uint64_t s390_facilities[3];
+-
--        CASE_OP_32_64(bswap16):
+-#define HAVE_FACILITY(X) \
--        CASE_OP_32_64(bswap32):
+-    ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
--        case INDEX_op_bswap64_i64:
+-
--            if (arg_is_const(op->args[1])) {
+-/* optional instructions */
--                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
+-#define TCG_TARGET_HAS_div2_i32       1
--                                          op->args[2]);
+-#define TCG_TARGET_HAS_rot_i32        1
--                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
+-#define TCG_TARGET_HAS_ext8s_i32      1
--                continue;
+-#define TCG_TARGET_HAS_ext16s_i32     1
--            }
+-#define TCG_TARGET_HAS_ext8u_i32      1
--            break;
+-#define TCG_TARGET_HAS_ext16u_i32     1
--
+-#define TCG_TARGET_HAS_bswap16_i32    1
-         default:
+-#define TCG_TARGET_HAS_bswap32_i32    1
-             break;
+-#define TCG_TARGET_HAS_not_i32        HAVE_FACILITY(MISC_INSN_EXT3)
+-#define TCG_TARGET_HAS_andc_i32       HAVE_FACILITY(MISC_INSN_EXT3)
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+-#define TCG_TARGET_HAS_orc_i32        HAVE_FACILITY(MISC_INSN_EXT3)
-         case INDEX_op_brcond2_i32:
+-#define TCG_TARGET_HAS_eqv_i32        HAVE_FACILITY(MISC_INSN_EXT3)
-             done = fold_brcond2(&ctx, op);
+-#define TCG_TARGET_HAS_nand_i32       HAVE_FACILITY(MISC_INSN_EXT3)
-             break;
+-#define TCG_TARGET_HAS_nor_i32        HAVE_FACILITY(MISC_INSN_EXT3)
-+        CASE_OP_32_64(bswap16):
+-#define TCG_TARGET_HAS_clz_i32        0
-+        CASE_OP_32_64(bswap32):
+-#define TCG_TARGET_HAS_ctz_i32        0
-+        case INDEX_op_bswap64_i64:
+-#define TCG_TARGET_HAS_ctpop_i32      1
-+            done = fold_bswap(&ctx, op);
+-#define TCG_TARGET_HAS_deposit_i32    1
-+            break;
+-#define TCG_TARGET_HAS_extract_i32    1
-         CASE_OP_32_64(clz):
+-#define TCG_TARGET_HAS_sextract_i32   0
-         CASE_OP_32_64(ctz):
+-#define TCG_TARGET_HAS_extract2_i32   0
-             done = fold_count_zeros(&ctx, op);
+-#define TCG_TARGET_HAS_negsetcond_i32 1
 -#define TCG_TARGET_HAS_add2_i32       1
 -#define TCG_TARGET_HAS_sub2_i32       1
 -#define TCG_TARGET_HAS_mulu2_i32      0
 -#define TCG_TARGET_HAS_muls2_i32      0
 -#define TCG_TARGET_HAS_muluh_i32      0
 -#define TCG_TARGET_HAS_mulsh_i32      0
 -#define TCG_TARGET_HAS_extr_i64_i32   0
 -#define TCG_TARGET_HAS_qemu_st8_i32   0
 -
 -#define TCG_TARGET_HAS_div2_i64       1
 -#define TCG_TARGET_HAS_rot_i64        1
 -#define TCG_TARGET_HAS_ext8s_i64      1
 -#define TCG_TARGET_HAS_ext16s_i64     1
 -#define TCG_TARGET_HAS_ext32s_i64     1
 -#define TCG_TARGET_HAS_ext8u_i64      1
 -#define TCG_TARGET_HAS_ext16u_i64     1
 -#define TCG_TARGET_HAS_ext32u_i64     1
 -#define TCG_TARGET_HAS_bswap16_i64    1
 -#define TCG_TARGET_HAS_bswap32_i64    1
 -#define TCG_TARGET_HAS_bswap64_i64    1
 -#define TCG_TARGET_HAS_not_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 -#define TCG_TARGET_HAS_andc_i64       HAVE_FACILITY(MISC_INSN_EXT3)
 -#define TCG_TARGET_HAS_orc_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 -#define TCG_TARGET_HAS_eqv_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 -#define TCG_TARGET_HAS_nand_i64       HAVE_FACILITY(MISC_INSN_EXT3)
 -#define TCG_TARGET_HAS_nor_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 -#define TCG_TARGET_HAS_clz_i64        1
 -#define TCG_TARGET_HAS_ctz_i64        0
 -#define TCG_TARGET_HAS_ctpop_i64      1
 -#define TCG_TARGET_HAS_deposit_i64    1
 -#define TCG_TARGET_HAS_extract_i64    1
 -#define TCG_TARGET_HAS_sextract_i64   0
 -#define TCG_TARGET_HAS_extract2_i64   0
 -#define TCG_TARGET_HAS_negsetcond_i64 1
 -#define TCG_TARGET_HAS_add2_i64       1
 -#define TCG_TARGET_HAS_sub2_i64       1
 -#define TCG_TARGET_HAS_mulu2_i64      1
 -#define TCG_TARGET_HAS_muls2_i64      HAVE_FACILITY(MISC_INSN_EXT2)
 -#define TCG_TARGET_HAS_muluh_i64      0
 -#define TCG_TARGET_HAS_mulsh_i64      0
 -
 -#define TCG_TARGET_HAS_qemu_ldst_i128 1
 -
 -#define TCG_TARGET_HAS_tst            1
 -
 -#define TCG_TARGET_HAS_v64            HAVE_FACILITY(VECTOR)
 -#define TCG_TARGET_HAS_v128           HAVE_FACILITY(VECTOR)
 -#define TCG_TARGET_HAS_v256           0
 -
 -#define TCG_TARGET_HAS_andc_vec       1
 -#define TCG_TARGET_HAS_orc_vec        HAVE_FACILITY(VECTOR_ENH1)
 -#define TCG_TARGET_HAS_nand_vec       HAVE_FACILITY(VECTOR_ENH1)
 -#define TCG_TARGET_HAS_nor_vec        1
 -#define TCG_TARGET_HAS_eqv_vec        HAVE_FACILITY(VECTOR_ENH1)
 -#define TCG_TARGET_HAS_not_vec        1
 -#define TCG_TARGET_HAS_neg_vec        1
 -#define TCG_TARGET_HAS_abs_vec        1
 -#define TCG_TARGET_HAS_roti_vec       1
 -#define TCG_TARGET_HAS_rots_vec       1
 -#define TCG_TARGET_HAS_rotv_vec       1
 -#define TCG_TARGET_HAS_shi_vec        1
 -#define TCG_TARGET_HAS_shs_vec        1
 -#define TCG_TARGET_HAS_shv_vec        1
 -#define TCG_TARGET_HAS_mul_vec        1
 -#define TCG_TARGET_HAS_sat_vec        0
 -#define TCG_TARGET_HAS_minmax_vec     1
 -#define TCG_TARGET_HAS_bitsel_vec     1
 -#define TCG_TARGET_HAS_cmpsel_vec     1
 -#define TCG_TARGET_HAS_tst_vec        0
 +#include "tcg-target-has.h"
  #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 --
-.25.1
+.43.0

-[PULL 46/56] tcg/optimize: Sink commutative operand swapping into fold functions
+[PULL 29/68] tcg/sparc64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-Most of these are handled by creating a fold_const2_commutative
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-to handle all of the binary operators.  The rest were already
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
-handled on a case-by-case basis in the switch, and have their
+Message-ID: <20250108215156.8731-12-philmd@linaro.org>
-own fold function in which to place the call.
+---
  tcg/sparc64/tcg-target-has.h | 86 ++++++++++++++++++++++++++++++++++++
  tcg/sparc64/tcg-target.h     | 78 +-------------------------------
 files changed, 88 insertions(+), 76 deletions(-)
  create mode 100644 tcg/sparc64/tcg-target-has.h
-We now have only one major switch on TCGOpcode.
+diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
+new file mode 100644
-Introduce NO_DEST and a block comment for swap_commutative in
+index XXXXXXX..XXXXXXX
-order to make the handling of brcond and movcond opcodes cleaner.
+--- /dev/null
++++ b/tcg/sparc64/tcg-target-has.h
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+@@ -XXX,XX +XXX,XX @@
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
++/* SPDX-License-Identifier: MIT */
----
++/*
- tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
++ * Define target-specific opcode support
-file changed, 70 insertions(+), 72 deletions(-)
++ * Copyright (c) 2008 Fabrice Bellard
 diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/optimize.c
 +++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
      return -1;
  }
 +/**
 + * swap_commutative:
 + * @dest: TCGArg of the destination argument, or NO_DEST.
 + * @p1: first paired argument
 + * @p2: second paired argument
 + *
 + * If *@p1 is a constant and *@p2 is not, swap.
 + * If *@p2 matches @dest, swap.
 + * Return true if a swap was performed.
 + */
 +
-+#define NO_DEST  temp_arg(NULL)
++#ifndef TCG_TARGET_HAS_H
 +#define TCG_TARGET_HAS_H
 +
- static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
++#if defined(__VIS__) && __VIS__ >= 0x300
- {
++#define use_vis3_instructions  1
-     TCGArg a1 = *p1, a2 = *p2;
++#else
-@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
++extern bool use_vis3_instructions;
-     return false;
++#endif
  }
 +static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 +{
 +    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 +    return fold_const2(ctx, op);
 +}
 +
- static bool fold_masks(OptContext *ctx, TCGOp *op)
++/* optional instructions */
- {
++#define TCG_TARGET_HAS_div_i32        1
-     uint64_t a_mask = ctx->a_mask;
++#define TCG_TARGET_HAS_rem_i32        0
-@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_rot_i32          0
++#define TCG_TARGET_HAS_ext8s_i32        0
- static bool fold_add(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_ext16s_i32       0
- {
++#define TCG_TARGET_HAS_ext8u_i32        0
--    if (fold_const2(ctx, op) ||
++#define TCG_TARGET_HAS_ext16u_i32       0
-+    if (fold_const2_commutative(ctx, op) ||
++#define TCG_TARGET_HAS_bswap16_i32      0
-         fold_xi_to_x(ctx, op, 0)) {
++#define TCG_TARGET_HAS_bswap32_i32      0
-         return true;
++#define TCG_TARGET_HAS_not_i32          1
-     }
++#define TCG_TARGET_HAS_andc_i32         1
-@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
++#define TCG_TARGET_HAS_orc_i32          1
++#define TCG_TARGET_HAS_eqv_i32          0
- static bool fold_add2(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_nand_i32         0
- {
++#define TCG_TARGET_HAS_nor_i32          0
-+    /* Note that the high and low parts may be independently swapped. */
++#define TCG_TARGET_HAS_clz_i32          0
-+    swap_commutative(op->args[0], &op->args[2], &op->args[4]);
++#define TCG_TARGET_HAS_ctz_i32          0
-+    swap_commutative(op->args[1], &op->args[3], &op->args[5]);
++#define TCG_TARGET_HAS_ctpop_i32        0
 +#define TCG_TARGET_HAS_deposit_i32      0
 +#define TCG_TARGET_HAS_extract_i32      0
 +#define TCG_TARGET_HAS_sextract_i32     0
 +#define TCG_TARGET_HAS_extract2_i32     0
 +#define TCG_TARGET_HAS_negsetcond_i32   1
 +#define TCG_TARGET_HAS_add2_i32         1
 +#define TCG_TARGET_HAS_sub2_i32         1
 +#define TCG_TARGET_HAS_mulu2_i32        1
 +#define TCG_TARGET_HAS_muls2_i32        1
 +#define TCG_TARGET_HAS_muluh_i32        0
 +#define TCG_TARGET_HAS_mulsh_i32        0
 +#define TCG_TARGET_HAS_qemu_st8_i32     0
 +
-     return fold_addsub2(ctx, op, true);
++#define TCG_TARGET_HAS_extr_i64_i32     0
- }
++#define TCG_TARGET_HAS_div_i64          1
++#define TCG_TARGET_HAS_rem_i64          0
-@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_rot_i64          0
- {
++#define TCG_TARGET_HAS_ext8s_i64        0
-     uint64_t z1, z2;
++#define TCG_TARGET_HAS_ext16s_i64       0
++#define TCG_TARGET_HAS_ext32s_i64       1
--    if (fold_const2(ctx, op) ||
++#define TCG_TARGET_HAS_ext8u_i64        0
-+    if (fold_const2_commutative(ctx, op) ||
++#define TCG_TARGET_HAS_ext16u_i64       0
-         fold_xi_to_i(ctx, op, 0) ||
++#define TCG_TARGET_HAS_ext32u_i64       1
-         fold_xi_to_x(ctx, op, -1) ||
++#define TCG_TARGET_HAS_bswap16_i64      0
-         fold_xx_to_x(ctx, op)) {
++#define TCG_TARGET_HAS_bswap32_i64      0
-@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_bswap64_i64      0
- static bool fold_brcond(OptContext *ctx, TCGOp *op)
++#define TCG_TARGET_HAS_not_i64          1
- {
++#define TCG_TARGET_HAS_andc_i64         1
-     TCGCond cond = op->args[2];
++#define TCG_TARGET_HAS_orc_i64          1
--    int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
++#define TCG_TARGET_HAS_eqv_i64          0
-+    int i;
++#define TCG_TARGET_HAS_nand_i64         0
++#define TCG_TARGET_HAS_nor_i64          0
-+    if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
++#define TCG_TARGET_HAS_clz_i64          0
-+        op->args[2] = cond = tcg_swap_cond(cond);
++#define TCG_TARGET_HAS_ctz_i64          0
-+    }
++#define TCG_TARGET_HAS_ctpop_i64        0
 +#define TCG_TARGET_HAS_deposit_i64      0
 +#define TCG_TARGET_HAS_extract_i64      0
 +#define TCG_TARGET_HAS_sextract_i64     0
 +#define TCG_TARGET_HAS_extract2_i64     0
 +#define TCG_TARGET_HAS_negsetcond_i64   1
 +#define TCG_TARGET_HAS_add2_i64         1
 +#define TCG_TARGET_HAS_sub2_i64         1
 +#define TCG_TARGET_HAS_mulu2_i64        0
 +#define TCG_TARGET_HAS_muls2_i64        0
 +#define TCG_TARGET_HAS_muluh_i64        use_vis3_instructions
 +#define TCG_TARGET_HAS_mulsh_i64        0
 +
-+    i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
++#define TCG_TARGET_HAS_qemu_ldst_i128   0
      if (i == 0) {
          tcg_op_remove(ctx->tcg, op);
          return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
  static bool fold_brcond2(OptContext *ctx, TCGOp *op)
  {
      TCGCond cond = op->args[4];
 -    int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
      TCGArg label = op->args[5];
 -    int inv = 0;
 +    int i, inv = 0;
 +    if (swap_commutative2(&op->args[0], &op->args[2])) {
 +        op->args[4] = cond = tcg_swap_cond(cond);
 +    }
 +
-+    i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
++#define TCG_TARGET_HAS_tst              1
      if (i >= 0) {
          goto do_brcond_const;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
  static bool fold_eqv(OptContext *ctx, TCGOp *op)
  {
 -    if (fold_const2(ctx, op) ||
 +    if (fold_const2_commutative(ctx, op) ||
          fold_xi_to_x(ctx, op, -1) ||
          fold_xi_to_not(ctx, op, 0)) {
          return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
  static bool fold_movcond(OptContext *ctx, TCGOp *op)
  {
      TCGCond cond = op->args[5];
 -    int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
 +    int i;
 +    if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
 +        op->args[5] = cond = tcg_swap_cond(cond);
 +    }
 +    /*
 +     * Canonicalize the "false" input reg to match the destination reg so
 +     * that the tcg backend can implement a "move if true" operation.
 +     */
 +    if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
 +        op->args[5] = cond = tcg_invert_cond(cond);
 +    }
 +
-+    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
++#endif
-     if (i >= 0) {
+diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
-         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
+index XXXXXXX..XXXXXXX 100644
-     }
+--- a/tcg/sparc64/tcg-target.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
++++ b/tcg/sparc64/tcg-target.h
+@@ -XXX,XX +XXX,XX @@ typedef enum {
- static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
+     TCG_REG_I7,
- {
+ } TCGReg;
--    if (fold_const2(ctx, op) ||
-+    if (fold_const2_commutative(ctx, op) ||
+-#if defined(__VIS__) && __VIS__ >= 0x300
-         fold_xi_to_i(ctx, op, 0)) {
+-#define use_vis3_instructions  1
-         return true;
+-#else
-     }
+-extern bool use_vis3_instructions;
-@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
+-#endif
+-
- static bool fold_multiply2(OptContext *ctx, TCGOp *op)
+-/* optional instructions */
- {
+-#define TCG_TARGET_HAS_div_i32        1
-+    swap_commutative(op->args[0], &op->args[2], &op->args[3]);
+-#define TCG_TARGET_HAS_rem_i32        0
 -#define TCG_TARGET_HAS_rot_i32          0
 -#define TCG_TARGET_HAS_ext8s_i32        0
 -#define TCG_TARGET_HAS_ext16s_i32       0
 -#define TCG_TARGET_HAS_ext8u_i32        0
 -#define TCG_TARGET_HAS_ext16u_i32       0
 -#define TCG_TARGET_HAS_bswap16_i32      0
 -#define TCG_TARGET_HAS_bswap32_i32      0
 -#define TCG_TARGET_HAS_not_i32          1
 -#define TCG_TARGET_HAS_andc_i32         1
 -#define TCG_TARGET_HAS_orc_i32          1
 -#define TCG_TARGET_HAS_eqv_i32          0
 -#define TCG_TARGET_HAS_nand_i32         0
 -#define TCG_TARGET_HAS_nor_i32          0
 -#define TCG_TARGET_HAS_clz_i32          0
 -#define TCG_TARGET_HAS_ctz_i32          0
 -#define TCG_TARGET_HAS_ctpop_i32        0
 -#define TCG_TARGET_HAS_deposit_i32      0
 -#define TCG_TARGET_HAS_extract_i32      0
 -#define TCG_TARGET_HAS_sextract_i32     0
 -#define TCG_TARGET_HAS_extract2_i32     0
 -#define TCG_TARGET_HAS_negsetcond_i32   1
 -#define TCG_TARGET_HAS_add2_i32         1
 -#define TCG_TARGET_HAS_sub2_i32         1
 -#define TCG_TARGET_HAS_mulu2_i32        1
 -#define TCG_TARGET_HAS_muls2_i32        1
 -#define TCG_TARGET_HAS_muluh_i32        0
 -#define TCG_TARGET_HAS_mulsh_i32        0
 -#define TCG_TARGET_HAS_qemu_st8_i32     0
 -
 -#define TCG_TARGET_HAS_extr_i64_i32     0
 -#define TCG_TARGET_HAS_div_i64          1
 -#define TCG_TARGET_HAS_rem_i64          0
 -#define TCG_TARGET_HAS_rot_i64          0
 -#define TCG_TARGET_HAS_ext8s_i64        0
 -#define TCG_TARGET_HAS_ext16s_i64       0
 -#define TCG_TARGET_HAS_ext32s_i64       1
 -#define TCG_TARGET_HAS_ext8u_i64        0
 -#define TCG_TARGET_HAS_ext16u_i64       0
 -#define TCG_TARGET_HAS_ext32u_i64       1
 -#define TCG_TARGET_HAS_bswap16_i64      0
 -#define TCG_TARGET_HAS_bswap32_i64      0
 -#define TCG_TARGET_HAS_bswap64_i64      0
 -#define TCG_TARGET_HAS_not_i64          1
 -#define TCG_TARGET_HAS_andc_i64         1
 -#define TCG_TARGET_HAS_orc_i64          1
 -#define TCG_TARGET_HAS_eqv_i64          0
 -#define TCG_TARGET_HAS_nand_i64         0
 -#define TCG_TARGET_HAS_nor_i64          0
 -#define TCG_TARGET_HAS_clz_i64          0
 -#define TCG_TARGET_HAS_ctz_i64          0
 -#define TCG_TARGET_HAS_ctpop_i64        0
 -#define TCG_TARGET_HAS_deposit_i64      0
 -#define TCG_TARGET_HAS_extract_i64      0
 -#define TCG_TARGET_HAS_sextract_i64     0
 -#define TCG_TARGET_HAS_extract2_i64     0
 -#define TCG_TARGET_HAS_negsetcond_i64   1
 -#define TCG_TARGET_HAS_add2_i64         1
 -#define TCG_TARGET_HAS_sub2_i64         1
 -#define TCG_TARGET_HAS_mulu2_i64        0
 -#define TCG_TARGET_HAS_muls2_i64        0
 -#define TCG_TARGET_HAS_muluh_i64        use_vis3_instructions
 -#define TCG_TARGET_HAS_mulsh_i64        0
 -
 -#define TCG_TARGET_HAS_qemu_ldst_i128   0
 -
 -#define TCG_TARGET_HAS_tst              1
 -
  #define TCG_AREG0 TCG_REG_I0
 +#include "tcg-target-has.h"
 +
-     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
+ #define TCG_TARGET_DEFAULT_MO (0)
-         uint64_t a = arg_info(op->args[2])->val;
-         uint64_t b = arg_info(op->args[3])->val;
+ #endif
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
  static bool fold_nand(OptContext *ctx, TCGOp *op)
  {
 -    if (fold_const2(ctx, op) ||
 +    if (fold_const2_commutative(ctx, op) ||
          fold_xi_to_not(ctx, op, -1)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
  static bool fold_nor(OptContext *ctx, TCGOp *op)
  {
 -    if (fold_const2(ctx, op) ||
 +    if (fold_const2_commutative(ctx, op) ||
          fold_xi_to_not(ctx, op, 0)) {
          return true;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
  static bool fold_or(OptContext *ctx, TCGOp *op)
  {
 -    if (fold_const2(ctx, op) ||
 +    if (fold_const2_commutative(ctx, op) ||
          fold_xi_to_x(ctx, op, 0) ||
          fold_xx_to_x(ctx, op)) {
          return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
  static bool fold_setcond(OptContext *ctx, TCGOp *op)
  {
      TCGCond cond = op->args[3];
 -    int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
 +    int i;
 +    if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
 +        op->args[3] = cond = tcg_swap_cond(cond);
 +    }
 +
 +    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
      if (i >= 0) {
          return tcg_opt_gen_movi(ctx, op, op->args[0], i);
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
  static bool fold_setcond2(OptContext *ctx, TCGOp *op)
  {
      TCGCond cond = op->args[5];
 -    int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
 -    int inv = 0;
 +    int i, inv = 0;
 +    if (swap_commutative2(&op->args[1], &op->args[3])) {
 +        op->args[5] = cond = tcg_swap_cond(cond);
 +    }
 +
 +    i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
      if (i >= 0) {
          goto do_setcond_const;
      }
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
  static bool fold_xor(OptContext *ctx, TCGOp *op)
  {
 -    if (fold_const2(ctx, op) ||
 +    if (fold_const2_commutative(ctx, op) ||
          fold_xx_to_i(ctx, op, 0) ||
          fold_xi_to_x(ctx, op, 0) ||
          fold_xi_to_not(ctx, op, -1)) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              ctx.type = TCG_TYPE_I32;
          }
 -        /* For commutative operations make constant second argument */
 -        switch (opc) {
 -        CASE_OP_32_64_VEC(add):
 -        CASE_OP_32_64_VEC(mul):
 -        CASE_OP_32_64_VEC(and):
 -        CASE_OP_32_64_VEC(or):
 -        CASE_OP_32_64_VEC(xor):
 -        CASE_OP_32_64(eqv):
 -        CASE_OP_32_64(nand):
 -        CASE_OP_32_64(nor):
 -        CASE_OP_32_64(muluh):
 -        CASE_OP_32_64(mulsh):
 -            swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 -            break;
 -        CASE_OP_32_64(brcond):
 -            if (swap_commutative(-1, &op->args[0], &op->args[1])) {
 -                op->args[2] = tcg_swap_cond(op->args[2]);
 -            }
 -            break;
 -        CASE_OP_32_64(setcond):
 -            if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
 -                op->args[3] = tcg_swap_cond(op->args[3]);
 -            }
 -            break;
 -        CASE_OP_32_64(movcond):
 -            if (swap_commutative(-1, &op->args[1], &op->args[2])) {
 -                op->args[5] = tcg_swap_cond(op->args[5]);
 -            }
 -            /* For movcond, we canonicalize the "false" input reg to match
 -               the destination reg so that the tcg backend can implement
 -               a "move if true" operation.  */
 -            if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
 -                op->args[5] = tcg_invert_cond(op->args[5]);
 -            }
 -            break;
 -        CASE_OP_32_64(add2):
 -            swap_commutative(op->args[0], &op->args[2], &op->args[4]);
 -            swap_commutative(op->args[1], &op->args[3], &op->args[5]);
 -            break;
 -        CASE_OP_32_64(mulu2):
 -        CASE_OP_32_64(muls2):
 -            swap_commutative(op->args[0], &op->args[2], &op->args[3]);
 -            break;
 -        case INDEX_op_brcond2_i32:
 -            if (swap_commutative2(&op->args[0], &op->args[2])) {
 -                op->args[4] = tcg_swap_cond(op->args[4]);
 -            }
 -            break;
 -        case INDEX_op_setcond2_i32:
 -            if (swap_commutative2(&op->args[1], &op->args[3])) {
 -                op->args[5] = tcg_swap_cond(op->args[5]);
 -            }
 -            break;
 -        default:
 -            break;
 -        }
 -
          /* Assume all bits affected, and no bits known zero. */
          ctx.a_mask = -1;
          ctx.z_mask = -1;
 --
-.25.1
+.43.0

-[PULL 05/56] host-utils: add unit tests for divu128/divs128
+[PULL 30/68] tcg/tci: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
-From: Luis Pires <luis.pires@eldorado.org.br>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Message-ID: <20250108215156.8731-13-philmd@linaro.org>
 ---
  tcg/tci/tcg-target-has.h | 83 ++++++++++++++++++++++++++++++++++++++++
  tcg/tci/tcg-target.h     | 75 +-----------------------------------
 files changed, 84 insertions(+), 74 deletions(-)
  create mode 100644 tcg/tci/tcg-target-has.h
-Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
+diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
  tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
  tests/unit/meson.build   |   1 +
 files changed, 198 insertions(+)
  create mode 100644 tests/unit/test-div128.c
 diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
-+++ b/tests/unit/test-div128.c
++++ b/tcg/tci/tcg-target-has.h
 @@ -XXX,XX +XXX,XX @@
++/* SPDX-License-Identifier: MIT */
 +/*
-+ * Test 128-bit division functions
++ * Define target-specific opcode support
-+ *
++ * Copyright (c) 2009, 2011 Stefan Weil
 + * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
 + *
 + * This library is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * This library is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 + */
 +
-+#include "qemu/osdep.h"
++#ifndef TCG_TARGET_HAS_H
-+#include "qemu/host-utils.h"
++#define TCG_TARGET_HAS_H
 +
-+typedef struct {
++#define TCG_TARGET_HAS_bswap16_i32      1
-+    uint64_t high;
++#define TCG_TARGET_HAS_bswap32_i32      1
-+    uint64_t low;
++#define TCG_TARGET_HAS_div_i32          1
-+    uint64_t rhigh;
++#define TCG_TARGET_HAS_rem_i32          1
-+    uint64_t rlow;
++#define TCG_TARGET_HAS_ext8s_i32        1
-+    uint64_t divisor;
++#define TCG_TARGET_HAS_ext16s_i32       1
-+    uint64_t remainder;
++#define TCG_TARGET_HAS_ext8u_i32        1
-+} test_data_unsigned;
++#define TCG_TARGET_HAS_ext16u_i32       1
 +#define TCG_TARGET_HAS_andc_i32         1
 +#define TCG_TARGET_HAS_deposit_i32      1
 +#define TCG_TARGET_HAS_extract_i32      1
 +#define TCG_TARGET_HAS_sextract_i32     1
 +#define TCG_TARGET_HAS_extract2_i32     0
 +#define TCG_TARGET_HAS_eqv_i32          1
 +#define TCG_TARGET_HAS_nand_i32         1
 +#define TCG_TARGET_HAS_nor_i32          1
 +#define TCG_TARGET_HAS_clz_i32          1
 +#define TCG_TARGET_HAS_ctz_i32          1
 +#define TCG_TARGET_HAS_ctpop_i32        1
 +#define TCG_TARGET_HAS_not_i32          1
 +#define TCG_TARGET_HAS_orc_i32          1
 +#define TCG_TARGET_HAS_rot_i32          1
 +#define TCG_TARGET_HAS_negsetcond_i32   0
 +#define TCG_TARGET_HAS_muls2_i32        1
 +#define TCG_TARGET_HAS_muluh_i32        0
 +#define TCG_TARGET_HAS_mulsh_i32        0
 +#define TCG_TARGET_HAS_qemu_st8_i32     0
 +
-+typedef struct {
++#if TCG_TARGET_REG_BITS == 64
-+    int64_t high;
++#define TCG_TARGET_HAS_extr_i64_i32     0
-+    uint64_t low;
++#define TCG_TARGET_HAS_bswap16_i64      1
-+    int64_t rhigh;
++#define TCG_TARGET_HAS_bswap32_i64      1
-+    uint64_t rlow;
++#define TCG_TARGET_HAS_bswap64_i64      1
-+    int64_t divisor;
++#define TCG_TARGET_HAS_deposit_i64      1
-+    int64_t remainder;
++#define TCG_TARGET_HAS_extract_i64      1
-+} test_data_signed;
++#define TCG_TARGET_HAS_sextract_i64     1
 +#define TCG_TARGET_HAS_extract2_i64     0
 +#define TCG_TARGET_HAS_div_i64          1
 +#define TCG_TARGET_HAS_rem_i64          1
 +#define TCG_TARGET_HAS_ext8s_i64        1
 +#define TCG_TARGET_HAS_ext16s_i64       1
 +#define TCG_TARGET_HAS_ext32s_i64       1
 +#define TCG_TARGET_HAS_ext8u_i64        1
 +#define TCG_TARGET_HAS_ext16u_i64       1
 +#define TCG_TARGET_HAS_ext32u_i64       1
 +#define TCG_TARGET_HAS_andc_i64         1
 +#define TCG_TARGET_HAS_eqv_i64          1
 +#define TCG_TARGET_HAS_nand_i64         1
 +#define TCG_TARGET_HAS_nor_i64          1
 +#define TCG_TARGET_HAS_clz_i64          1
 +#define TCG_TARGET_HAS_ctz_i64          1
 +#define TCG_TARGET_HAS_ctpop_i64        1
 +#define TCG_TARGET_HAS_not_i64          1
 +#define TCG_TARGET_HAS_orc_i64          1
 +#define TCG_TARGET_HAS_rot_i64          1
 +#define TCG_TARGET_HAS_negsetcond_i64   0
 +#define TCG_TARGET_HAS_muls2_i64        1
 +#define TCG_TARGET_HAS_add2_i32         1
 +#define TCG_TARGET_HAS_sub2_i32         1
 +#define TCG_TARGET_HAS_mulu2_i32        1
 +#define TCG_TARGET_HAS_add2_i64         1
 +#define TCG_TARGET_HAS_sub2_i64         1
 +#define TCG_TARGET_HAS_mulu2_i64        1
 +#define TCG_TARGET_HAS_muluh_i64        0
 +#define TCG_TARGET_HAS_mulsh_i64        0
 +#else
 +#define TCG_TARGET_HAS_mulu2_i32        1
 +#endif /* TCG_TARGET_REG_BITS == 64 */
 +
-+static const test_data_unsigned test_table_unsigned[] = {
++#define TCG_TARGET_HAS_qemu_ldst_i128   0
 +    /* Dividend fits in 64 bits */
 +    { 0x0000000000000000ULL, 0x0000000000000000ULL,
 +      0x0000000000000000ULL, 0x0000000000000000ULL,
 +      0x0000000000000001ULL, 0x0000000000000000ULL},
 +    { 0x0000000000000000ULL, 0x0000000000000001ULL,
 +      0x0000000000000000ULL, 0x0000000000000001ULL,
 +      0x0000000000000001ULL, 0x0000000000000000ULL},
 +    { 0x0000000000000000ULL, 0x0000000000000003ULL,
 +      0x0000000000000000ULL, 0x0000000000000001ULL,
 +      0x0000000000000002ULL, 0x0000000000000001ULL},
 +    { 0x0000000000000000ULL, 0x8000000000000000ULL,
 +      0x0000000000000000ULL, 0x8000000000000000ULL,
 +      0x0000000000000001ULL, 0x0000000000000000ULL},
 +    { 0x0000000000000000ULL, 0xa000000000000000ULL,
 +      0x0000000000000000ULL, 0x0000000000000002ULL,
 +      0x4000000000000000ULL, 0x2000000000000000ULL},
 +    { 0x0000000000000000ULL, 0x8000000000000000ULL,
 +      0x0000000000000000ULL, 0x0000000000000001ULL,
 +      0x8000000000000000ULL, 0x0000000000000000ULL},
 +
-+    /* Dividend > 64 bits, with MSB 0 */
++#define TCG_TARGET_HAS_tst              1
 +    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
 +      0x123456789abcdefeULL, 0xefedcba987654321ULL,
 +      0x0000000000000001ULL, 0x0000000000000000ULL},
 +    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
 +      0x0000000000000001ULL, 0x000000000000000dULL,
 +      0x123456789abcdefeULL, 0x03456789abcdf03bULL},
 +    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
 +      0x0123456789abcdefULL, 0xeefedcba98765432ULL,
 +      0x0000000000000010ULL, 0x0000000000000001ULL},
 +
-+    /* Dividend > 64 bits, with MSB 1 */
++#endif
-+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
 +      0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
 +      0x0000000000000001ULL, 0x0000000000000000ULL},
 +    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
 +      0x0000000000000001ULL, 0x0000000000000000ULL,
 +      0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
 +    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
 +      0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
 +      0x0000000000000010ULL, 0x000000000000000fULL},
 +    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
 +      0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
 +      0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
 +
 +    /**
 +     * Divisor == 64 bits, with MSB 1
 +     * and high 64 bits of dividend >= divisor
 +     * (for testing normalization)
 +     */
 +    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
 +      0x0000000000000001ULL, 0x0000000000000000ULL,
 +      0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
 +    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
 +      0x0000000000000001ULL, 0xfddbb9977553310aULL,
 +      0x8000000000000001ULL, 0x78899aabbccddf05ULL},
 +
 +    /* Dividend > 64 bits, divisor almost as big */
 +    { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
 +      0x0000000000000000ULL, 0x000000000000000fULL,
 +      0x123456789abcdefeULL, 0x123456789abcde1fULL},
 +};
 +
 +static const test_data_signed test_table_signed[] = {
 +    /* Positive dividend, positive/negative divisors */
 +    { 0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0x0000000000000001LL, 0x0000000000000000LL},
 +    { 0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0xffffffffffffffffLL, 0x0000000000000000LL},
 +    { 0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0x0000000000000000LL, 0x00000000005e30a7ULL,
 +      0x0000000000000002LL, 0x0000000000000000LL},
 +    { 0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
 +      0xfffffffffffffffeLL, 0x0000000000000000LL},
 +    { 0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0x0000000000000000LL, 0x0000000000178c29ULL,
 +      0x0000000000000008LL, 0x0000000000000006LL},
 +    { 0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
 +      0xfffffffffffffff8LL, 0x0000000000000006LL},
 +    { 0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0x0000000000000000LL, 0x000000000000550dULL,
 +      0x0000000000000237LL, 0x0000000000000183LL},
 +    { 0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
 +      0xfffffffffffffdc9LL, 0x0000000000000183LL},
 +
 +    /* Negative dividend, positive/negative divisors */
 +    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0x0000000000000001LL, 0x0000000000000000LL},
 +    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0x0000000000000000LL, 0x0000000000bc614eULL,
 +      0xffffffffffffffffLL, 0x0000000000000000LL},
 +    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
 +      0x0000000000000002LL, 0x0000000000000000LL},
 +    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0x0000000000000000LL, 0x00000000005e30a7ULL,
 +      0xfffffffffffffffeLL, 0x0000000000000000LL},
 +    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
 +      0x0000000000000008LL, 0xfffffffffffffffaLL},
 +    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0x0000000000000000LL, 0x0000000000178c29ULL,
 +      0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
 +    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
 +      0x0000000000000237LL, 0xfffffffffffffe7dLL},
 +    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
 +      0x0000000000000000LL, 0x000000000000550dULL,
 +      0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
 +};
 +
 +static void test_divu128(void)
 +{
 +    int i;
 +    uint64_t rem;
 +    test_data_unsigned tmp;
 +
 +    for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
 +        tmp = test_table_unsigned[i];
 +
 +        rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
 +        g_assert_cmpuint(tmp.low, ==, tmp.rlow);
 +        g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
 +        g_assert_cmpuint(rem, ==, tmp.remainder);
 +    }
 +}
 +
 +static void test_divs128(void)
 +{
 +    int i;
 +    int64_t rem;
 +    test_data_signed tmp;
 +
 +    for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
 +        tmp = test_table_signed[i];
 +
 +        rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
 +        g_assert_cmpuint(tmp.low, ==, tmp.rlow);
 +        g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
 +        g_assert_cmpuint(rem, ==, tmp.remainder);
 +    }
 +}
 +
 +int main(int argc, char **argv)
 +{
 +    g_test_init(&argc, &argv, NULL);
 +    g_test_add_func("/host-utils/test_divu128", test_divu128);
 +    g_test_add_func("/host-utils/test_divs128", test_divs128);
 +    return g_test_run();
 +}
 diff --git a/tests/unit/meson.build b/tests/unit/meson.build
 index XXXXXXX..XXXXXXX 100644
---- a/tests/unit/meson.build
+--- a/tcg/tci/tcg-target.h
-+++ b/tests/unit/meson.build
++++ b/tcg/tci/tcg-target.h
-@@ -XXX,XX +XXX,XX @@ tests = {
+@@ -XXX,XX +XXX,XX @@
-   # all code tested by test-x86-cpuid is inside topology.h
+ #define TCG_TARGET_INSN_UNIT_SIZE 4
-   'test-x86-cpuid': [],
+ #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
-   'test-cutils': [],
-+  'test-div128': [],
+-/* Optional instructions. */
-   'test-shift128': [],
+-
-   'test-mul64': [],
+-#define TCG_TARGET_HAS_bswap16_i32      1
-   # all code tested by test-int128 is inside int128.h
+-#define TCG_TARGET_HAS_bswap32_i32      1
 -#define TCG_TARGET_HAS_div_i32          1
 -#define TCG_TARGET_HAS_rem_i32          1
 -#define TCG_TARGET_HAS_ext8s_i32        1
 -#define TCG_TARGET_HAS_ext16s_i32       1
 -#define TCG_TARGET_HAS_ext8u_i32        1
 -#define TCG_TARGET_HAS_ext16u_i32       1
 -#define TCG_TARGET_HAS_andc_i32         1
 -#define TCG_TARGET_HAS_deposit_i32      1
 -#define TCG_TARGET_HAS_extract_i32      1
 -#define TCG_TARGET_HAS_sextract_i32     1
 -#define TCG_TARGET_HAS_extract2_i32     0
 -#define TCG_TARGET_HAS_eqv_i32          1
 -#define TCG_TARGET_HAS_nand_i32         1
 -#define TCG_TARGET_HAS_nor_i32          1
 -#define TCG_TARGET_HAS_clz_i32          1
 -#define TCG_TARGET_HAS_ctz_i32          1
 -#define TCG_TARGET_HAS_ctpop_i32        1
 -#define TCG_TARGET_HAS_not_i32          1
 -#define TCG_TARGET_HAS_orc_i32          1
 -#define TCG_TARGET_HAS_rot_i32          1
 -#define TCG_TARGET_HAS_negsetcond_i32   0
 -#define TCG_TARGET_HAS_muls2_i32        1
 -#define TCG_TARGET_HAS_muluh_i32        0
 -#define TCG_TARGET_HAS_mulsh_i32        0
 -#define TCG_TARGET_HAS_qemu_st8_i32     0
 -
 -#if TCG_TARGET_REG_BITS == 64
 -#define TCG_TARGET_HAS_extr_i64_i32     0
 -#define TCG_TARGET_HAS_bswap16_i64      1
 -#define TCG_TARGET_HAS_bswap32_i64      1
 -#define TCG_TARGET_HAS_bswap64_i64      1
 -#define TCG_TARGET_HAS_deposit_i64      1
 -#define TCG_TARGET_HAS_extract_i64      1
 -#define TCG_TARGET_HAS_sextract_i64     1
 -#define TCG_TARGET_HAS_extract2_i64     0
 -#define TCG_TARGET_HAS_div_i64          1
 -#define TCG_TARGET_HAS_rem_i64          1
 -#define TCG_TARGET_HAS_ext8s_i64        1
 -#define TCG_TARGET_HAS_ext16s_i64       1
 -#define TCG_TARGET_HAS_ext32s_i64       1
 -#define TCG_TARGET_HAS_ext8u_i64        1
 -#define TCG_TARGET_HAS_ext16u_i64       1
 -#define TCG_TARGET_HAS_ext32u_i64       1
 -#define TCG_TARGET_HAS_andc_i64         1
 -#define TCG_TARGET_HAS_eqv_i64          1
 -#define TCG_TARGET_HAS_nand_i64         1
 -#define TCG_TARGET_HAS_nor_i64          1
 -#define TCG_TARGET_HAS_clz_i64          1
 -#define TCG_TARGET_HAS_ctz_i64          1
 -#define TCG_TARGET_HAS_ctpop_i64        1
 -#define TCG_TARGET_HAS_not_i64          1
 -#define TCG_TARGET_HAS_orc_i64          1
 -#define TCG_TARGET_HAS_rot_i64          1
 -#define TCG_TARGET_HAS_negsetcond_i64   0
 -#define TCG_TARGET_HAS_muls2_i64        1
 -#define TCG_TARGET_HAS_add2_i32         1
 -#define TCG_TARGET_HAS_sub2_i32         1
 -#define TCG_TARGET_HAS_mulu2_i32        1
 -#define TCG_TARGET_HAS_add2_i64         1
 -#define TCG_TARGET_HAS_sub2_i64         1
 -#define TCG_TARGET_HAS_mulu2_i64        1
 -#define TCG_TARGET_HAS_muluh_i64        0
 -#define TCG_TARGET_HAS_mulsh_i64        0
 -#else
 -#define TCG_TARGET_HAS_mulu2_i32        1
 -#endif /* TCG_TARGET_REG_BITS == 64 */
 -
 -#define TCG_TARGET_HAS_qemu_ldst_i128   0
 -
 -#define TCG_TARGET_HAS_tst              1
 +#include "tcg-target-has.h"
  /* Number of registers available. */
  #define TCG_TARGET_NB_REGS 16
 --
-.25.1
+.43.0

-[PULL 31/56] tcg/optimize: Split out fold_count_zeros
+[PULL 31/68] tcg: Include 'tcg-target-has.h' once in 'tcg-has.h'
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-14-philmd@linaro.org>
 ---
- tcg/optimize.c | 32 ++++++++++++++++++--------------
+ tcg/aarch64/tcg-target.h     | 2 --
-file changed, 18 insertions(+), 14 deletions(-)
+ tcg/arm/tcg-target.h         | 2 --
  tcg/i386/tcg-target.h        | 2 --
  tcg/loongarch64/tcg-target.h | 2 --
  tcg/mips/tcg-target.h        | 2 --
  tcg/ppc/tcg-target.h         | 2 --
  tcg/riscv/tcg-target.h       | 2 --
  tcg/s390x/tcg-target.h       | 2 --
  tcg/sparc64/tcg-target.h     | 2 --
  tcg/tcg-has.h                | 2 ++
  tcg/tci/tcg-target.h         | 2 --
 files changed, 2 insertions(+), 20 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/aarch64/tcg-target.h
-+++ b/tcg/optimize.c
++++ b/tcg/aarch64/tcg-target.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ typedef enum {
-     return true;
- }
+ #define TCG_TARGET_NB_REGS 64
-+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
+-#include "tcg-target-has.h"
-+{
+-
-+    if (arg_is_const(op->args[1])) {
+ #define TCG_TARGET_DEFAULT_MO (0)
-+        uint64_t t = arg_info(op->args[1])->val;
  #endif /* AARCH64_TCG_TARGET_H */
 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.h
 +++ b/tcg/arm/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_TARGET_NB_REGS 32
 -#include "tcg-target-has.h"
 -
  #define TCG_TARGET_DEFAULT_MO (0)
  #endif
 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.h
 +++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_REG_CALL_STACK = TCG_REG_ESP
  } TCGReg;
 -#include "tcg-target-has.h"
 -
  /* This defines the natural memory order supported by this
   * architecture before guarantees made by various barrier
   * instructions.
 diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.h
 +++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_VEC_TMP0 = TCG_REG_V23,
  } TCGReg;
 -#include "tcg-target-has.h"
 -
  #define TCG_TARGET_DEFAULT_MO (0)
  #endif /* LOONGARCH_TCG_TARGET_H */
 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.h
 +++ b/tcg/mips/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_AREG0 = TCG_REG_S8,
  } TCGReg;
 -#include "tcg-target-has.h"
 -
  #define TCG_TARGET_DEFAULT_MO           0
  #endif
 diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target.h
 +++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_AREG0 = TCG_REG_R27
  } TCGReg;
 -#include "tcg-target-has.h"
 -
  #define TCG_TARGET_DEFAULT_MO (0)
  #endif
 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target.h
 +++ b/tcg/riscv/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_REG_TMP2       = TCG_REG_T4,
  } TCGReg;
 -#include "tcg-target-has.h"
 -
  #define TCG_TARGET_DEFAULT_MO (0)
  #endif
 diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target.h
 +++ b/tcg/s390x/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
  #define TCG_TARGET_NB_REGS 64
 -#include "tcg-target-has.h"
 -
  #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
  #endif
 diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc64/tcg-target.h
 +++ b/tcg/sparc64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_AREG0 TCG_REG_I0
 -#include "tcg-target-has.h"
 -
  #define TCG_TARGET_DEFAULT_MO (0)
  #endif
 diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-has.h
 +++ b/tcg/tcg-has.h
@@ -XXX,XX +XXX,XX @@
  #ifndef TCG_HAS_H
  #define TCG_HAS_H
 +#include "tcg-target-has.h"
 +
-+        if (t != 0) {
+ #if TCG_TARGET_REG_BITS == 32
-+            t = do_constant_folding(op->opc, t, 0);
+ /* Turn some undef macros into false macros.  */
-+            return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ #define TCG_TARGET_HAS_extr_i64_i32     0
-+        }
+diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
-+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
+index XXXXXXX..XXXXXXX 100644
-+    }
+--- a/tcg/tci/tcg-target.h
-+    return false;
++++ b/tcg/tci/tcg-target.h
-+}
+@@ -XXX,XX +XXX,XX @@
-+
+ #define TCG_TARGET_INSN_UNIT_SIZE 4
- static bool fold_ctpop(OptContext *ctx, TCGOp *op)
+ #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
- {
-     return fold_const1(ctx, op);
+-#include "tcg-target-has.h"
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              }
              break;
 -        CASE_OP_32_64(clz):
 -        CASE_OP_32_64(ctz):
 -            if (arg_is_const(op->args[1])) {
 -                TCGArg v = arg_info(op->args[1])->val;
 -                if (v != 0) {
 -                    tmp = do_constant_folding(opc, v, 0);
 -                    tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
 -                } else {
 -                    tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
 -                }
 -                continue;
 -            }
 -            break;
 -
-         default:
+ /* Number of registers available. */
-             break;
+ #define TCG_TARGET_NB_REGS 16
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          case INDEX_op_brcond2_i32:
              done = fold_brcond2(&ctx, op);
              break;
 +        CASE_OP_32_64(clz):
 +        CASE_OP_32_64(ctz):
 +            done = fold_count_zeros(&ctx, op);
 +            break;
          CASE_OP_32_64(ctpop):
              done = fold_ctpop(&ctx, op);
              break;
 --
-.25.1
+.43.0

-[PULL 45/56] tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
+[PULL 32/68] tcg: Only include 'tcg-has.h' when necessary
-Rename to fold_addsub2.
+TCG_TARGET_HAS_* definitions don't need to be exposed
-Use Int128 to implement the wider operation.
+by "tcg/tcg.h". Only include 'tcg-has.h' when necessary.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Message-ID: <20250108215156.8731-15-philmd@linaro.org>
 ---
- tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
+ include/tcg/tcg.h | 2 --
-file changed, 44 insertions(+), 21 deletions(-)
+ tcg/optimize.c    | 1 +
  tcg/tcg-common.c  | 1 +
  tcg/tcg-op-gvec.c | 1 +
  tcg/tcg-op-ldst.c | 2 +-
  tcg/tcg-op-vec.c  | 1 +
  tcg/tcg-op.c      | 2 +-
  tcg/tcg.c         | 1 +
  tcg/tci.c         | 1 +
 files changed, 8 insertions(+), 4 deletions(-)
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
+ #error unsupported
+ #endif
+-#include "tcg/tcg-has.h"
+-
+ typedef enum TCGOpcode {
+ #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
+ #include "tcg/tcg-opc.h"
 diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/optimize.c
 +++ b/tcg/optimize.c
 @@ -XXX,XX +XXX,XX @@
-  */
+ #include "qemu/interval-tree.h"
  #include "tcg/tcg-op-common.h"
  #include "tcg-internal.h"
 +#include "tcg-has.h"
  #define CASE_OP_32_64(x)                        \
          glue(glue(case INDEX_op_, x), _i32):    \
 diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-common.c
 +++ b/tcg/tcg-common.c
@@ -XXX,XX +XXX,XX @@
  #include "qemu/osdep.h"
-+#include "qemu/int128.h"
+ #include "tcg/tcg.h"
- #include "tcg/tcg-op.h"
++#include "tcg-has.h"
  TCGOpDef tcg_op_defs[] = {
  #define DEF(s, oargs, iargs, cargs, flags) \
 diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-op-gvec.c
 +++ b/tcg/tcg-op-gvec.c
@@ -XXX,XX +XXX,XX @@
  #include "tcg/tcg-op-common.h"
  #include "tcg/tcg-op-gvec-common.h"
  #include "tcg/tcg-gvec-desc.h"
 +#include "tcg-has.h"
  #define MAX_UNROLL  4
 diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-op-ldst.c
 +++ b/tcg/tcg-op-ldst.c
@@ -XXX,XX +XXX,XX @@
  #include "exec/translation-block.h"
  #include "exec/plugin-gen.h"
  #include "tcg-internal.h"
+-
-@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
++#include "tcg-has.h"
-     return false;
- }
+ static void check_max_alignment(unsigned a_bits)
 -static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
 +static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
  {
-     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
+diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
-         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
+index XXXXXXX..XXXXXXX 100644
--        uint32_t al = arg_info(op->args[2])->val;
+--- a/tcg/tcg-op-vec.c
--        uint32_t ah = arg_info(op->args[3])->val;
++++ b/tcg/tcg-op-vec.c
--        uint32_t bl = arg_info(op->args[4])->val;
+@@ -XXX,XX +XXX,XX @@
--        uint32_t bh = arg_info(op->args[5])->val;
+ #include "tcg/tcg-op-common.h"
--        uint64_t a = ((uint64_t)ah << 32) | al;
+ #include "tcg/tcg-mo.h"
--        uint64_t b = ((uint64_t)bh << 32) | bl;
+ #include "tcg-internal.h"
-+        uint64_t al = arg_info(op->args[2])->val;
++#include "tcg-has.h"
-+        uint64_t ah = arg_info(op->args[3])->val;
-+        uint64_t bl = arg_info(op->args[4])->val;
+ /*
-+        uint64_t bh = arg_info(op->args[5])->val;
+  * Vector optional opcode tracking.
-         TCGArg rl, rh;
+diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
--        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
+index XXXXXXX..XXXXXXX 100644
-+        TCGOp *op2;
+--- a/tcg/tcg-op.c
++++ b/tcg/tcg-op.c
--        if (add) {
+@@ -XXX,XX +XXX,XX @@
--            a += b;
+ #include "exec/translation-block.h"
-+        if (ctx->type == TCG_TYPE_I32) {
+ #include "exec/plugin-gen.h"
-+            uint64_t a = deposit64(al, 32, 32, ah);
+ #include "tcg-internal.h"
-+            uint64_t b = deposit64(bl, 32, 32, bh);
+-
-+
++#include "tcg-has.h"
-+            if (add) {
-+                a += b;
+ /*
-+            } else {
+  * Encourage the compiler to tail-call to a function, rather than inlining.
-+                a -= b;
+diff --git a/tcg/tcg.c b/tcg/tcg.c
-+            }
+index XXXXXXX..XXXXXXX 100644
-+
+--- a/tcg/tcg.c
-+            al = sextract64(a, 0, 32);
++++ b/tcg/tcg.c
-+            ah = sextract64(a, 32, 32);
+@@ -XXX,XX +XXX,XX @@
-         } else {
+ #include "tcg/tcg-temp-internal.h"
--            a -= b;
+ #include "tcg-internal.h"
-+            Int128 a = int128_make128(al, ah);
+ #include "tcg/perf.h"
-+            Int128 b = int128_make128(bl, bh);
++#include "tcg-has.h"
-+
+ #ifdef CONFIG_USER_ONLY
-+            if (add) {
+ #include "user/guest-base.h"
-+                a = int128_add(a, b);
+ #endif
-+            } else {
+diff --git a/tcg/tci.c b/tcg/tci.c
-+                a = int128_sub(a, b);
+index XXXXXXX..XXXXXXX 100644
-+            }
+--- a/tcg/tci.c
-+
++++ b/tcg/tci.c
-+            al = int128_getlo(a);
+@@ -XXX,XX +XXX,XX @@
-+            ah = int128_gethi(a);
+ #include "tcg/helper-info.h"
-         }
+ #include "tcg/tcg-ldst.h"
+ #include "disas/dis-asm.h"
-         rl = op->args[0];
++#include "tcg-has.h"
-         rh = op->args[1];
+ #include <ffi.h>
--        tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
--        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
 +
 +        /* The proper opcode is supplied by tcg_opt_gen_mov. */
 +        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
 +
 +        tcg_opt_gen_movi(ctx, op, rl, al);
 +        tcg_opt_gen_movi(ctx, op2, rh, ah);
          return true;
      }
      return false;
  }
 -static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
 +static bool fold_add2(OptContext *ctx, TCGOp *op)
  {
 -    return fold_addsub2_i32(ctx, op, true);
 +    return fold_addsub2(ctx, op, true);
  }
  static bool fold_and(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
      return false;
  }
 -static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
 +static bool fold_sub2(OptContext *ctx, TCGOp *op)
  {
 -    return fold_addsub2_i32(ctx, op, false);
 +    return fold_addsub2(ctx, op, false);
  }
  static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64_VEC(add):
              done = fold_add(&ctx, op);
              break;
 -        case INDEX_op_add2_i32:
 -            done = fold_add2_i32(&ctx, op);
 +        CASE_OP_32_64(add2):
 +            done = fold_add2(&ctx, op);
              break;
          CASE_OP_32_64_VEC(and):
              done = fold_and(&ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64_VEC(sub):
              done = fold_sub(&ctx, op);
              break;
 -        case INDEX_op_sub2_i32:
 -            done = fold_sub2_i32(&ctx, op);
 +        CASE_OP_32_64(sub2):
 +            done = fold_sub2(&ctx, op);
              break;
          CASE_OP_32_64_VEC(xor):
              done = fold_xor(&ctx, op);
 --
-.25.1
+.43.0

-[PULL 30/56] tcg/optimize: Split out fold_deposit
+[PULL 33/68] tcg: Split out tcg-target-mo.h
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 25 +++++++++++++++----------
+ accel/tcg/internal-target.h     |  1 +
-file changed, 15 insertions(+), 10 deletions(-)
+ tcg/aarch64/tcg-target-mo.h     | 12 ++++++++++++
  tcg/aarch64/tcg-target.h        |  2 --
  tcg/arm/tcg-target-mo.h         | 13 +++++++++++++
  tcg/arm/tcg-target.h            |  2 --
  tcg/i386/tcg-target-mo.h        | 19 +++++++++++++++++++
  tcg/i386/tcg-target.h           | 11 -----------
  tcg/loongarch64/tcg-target-mo.h | 12 ++++++++++++
  tcg/loongarch64/tcg-target.h    |  2 --
  tcg/mips/tcg-target-mo.h        | 13 +++++++++++++
  tcg/mips/tcg-target.h           |  2 --
  tcg/ppc/tcg-target-mo.h         | 12 ++++++++++++
  tcg/ppc/tcg-target.h            |  2 --
  tcg/riscv/tcg-target-mo.h       | 12 ++++++++++++
  tcg/riscv/tcg-target.h          |  2 --
  tcg/s390x/tcg-target-mo.h       | 12 ++++++++++++
  tcg/s390x/tcg-target.h          |  2 --
  tcg/sparc64/tcg-target-mo.h     | 12 ++++++++++++
  tcg/sparc64/tcg-target.h        |  2 --
  tcg/tci/tcg-target-mo.h         | 17 +++++++++++++++++
  tcg/tci/tcg-target.h            |  5 -----
  tcg/tcg-op-ldst.c               |  1 +
 files changed, 136 insertions(+), 32 deletions(-)
  create mode 100644 tcg/aarch64/tcg-target-mo.h
  create mode 100644 tcg/arm/tcg-target-mo.h
  create mode 100644 tcg/i386/tcg-target-mo.h
  create mode 100644 tcg/loongarch64/tcg-target-mo.h
  create mode 100644 tcg/mips/tcg-target-mo.h
  create mode 100644 tcg/ppc/tcg-target-mo.h
  create mode 100644 tcg/riscv/tcg-target-mo.h
  create mode 100644 tcg/s390x/tcg-target-mo.h
  create mode 100644 tcg/sparc64/tcg-target-mo.h
  create mode 100644 tcg/tci/tcg-target-mo.h
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/accel/tcg/internal-target.h b/accel/tcg/internal-target.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/accel/tcg/internal-target.h
-+++ b/tcg/optimize.c
++++ b/accel/tcg/internal-target.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@
-     return fold_const1(ctx, op);
+ #include "exec/exec-all.h"
- }
+ #include "exec/translation-block.h"
+ #include "tb-internal.h"
-+static bool fold_deposit(OptContext *ctx, TCGOp *op)
++#include "tcg-target-mo.h"
-+{
-+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+ /*
-+        uint64_t t1 = arg_info(op->args[1])->val;
+  * Access to the various translations structures need to be serialised
-+        uint64_t t2 = arg_info(op->args[2])->val;
+diff --git a/tcg/aarch64/tcg-target-mo.h b/tcg/aarch64/tcg-target-mo.h
-+
+new file mode 100644
-+        t1 = deposit64(t1, op->args[3], op->args[4], t2);
+index XXXXXXX..XXXXXXX
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+--- /dev/null
-+    }
++++ b/tcg/aarch64/tcg-target-mo.h
-+    return false;
+@@ -XXX,XX +XXX,XX @@
-+}
++/* SPDX-License-Identifier: GPL-2.0-or-later */
-+
++/*
- static bool fold_divide(OptContext *ctx, TCGOp *op)
++ * Define target-specific memory model
 + * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +#define TCG_TARGET_DEFAULT_MO  0
 +
 +#endif
 diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target.h
 +++ b/tcg/aarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_TARGET_NB_REGS 64
 -#define TCG_TARGET_DEFAULT_MO (0)
 -
  #endif /* AARCH64_TCG_TARGET_H */
 diff --git a/tcg/arm/tcg-target-mo.h b/tcg/arm/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/arm/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2008 Fabrice Bellard
 + * Copyright (c) 2008 Andrzej Zaborowski
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +#define TCG_TARGET_DEFAULT_MO  0
 +
 +#endif
 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.h
 +++ b/tcg/arm/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_TARGET_NB_REGS 32
 -#define TCG_TARGET_DEFAULT_MO (0)
 -
  #endif
 diff --git a/tcg/i386/tcg-target-mo.h b/tcg/i386/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/i386/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2008 Fabrice Bellard
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +/*
 + * This defines the natural memory order supported by this architecture
 + * before guarantees made by various barrier instructions.
 + *
 + * The x86 has a pretty strong memory ordering which only really
 + * allows for some stores to be re-ordered after loads.
 + */
 +#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 +
 +#endif
 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.h
 +++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_REG_CALL_STACK = TCG_REG_ESP
  } TCGReg;
 -/* This defines the natural memory order supported by this
 - * architecture before guarantees made by various barrier
 - * instructions.
 - *
 - * The x86 has a pretty strong memory ordering which only really
 - * allows for some stores to be re-ordered after loads.
 - */
 -#include "tcg/tcg-mo.h"
 -
 -#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 -
  #endif
 diff --git a/tcg/loongarch64/tcg-target-mo.h b/tcg/loongarch64/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/loongarch64/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +#define TCG_TARGET_DEFAULT_MO  0
 +
 +#endif
 diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.h
 +++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_VEC_TMP0 = TCG_REG_V23,
  } TCGReg;
 -#define TCG_TARGET_DEFAULT_MO (0)
 -
  #endif /* LOONGARCH_TCG_TARGET_H */
 diff --git a/tcg/mips/tcg-target-mo.h b/tcg/mips/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/mips/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
 + * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +#define TCG_TARGET_DEFAULT_MO  0
 +
 +#endif
 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.h
 +++ b/tcg/mips/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_AREG0 = TCG_REG_S8,
  } TCGReg;
 -#define TCG_TARGET_DEFAULT_MO           0
 -
  #endif
 diff --git a/tcg/ppc/tcg-target-mo.h b/tcg/ppc/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/ppc/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2008 Fabrice Bellard
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +#define TCG_TARGET_DEFAULT_MO  0
 +
 +#endif
 diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target.h
 +++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_AREG0 = TCG_REG_R27
  } TCGReg;
 -#define TCG_TARGET_DEFAULT_MO (0)
 -
  #endif
 diff --git a/tcg/riscv/tcg-target-mo.h b/tcg/riscv/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/riscv/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2018 SiFive, Inc
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +#define TCG_TARGET_DEFAULT_MO  0
 +
 +#endif
 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target.h
 +++ b/tcg/riscv/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
      TCG_REG_TMP2       = TCG_REG_T4,
  } TCGReg;
 -#define TCG_TARGET_DEFAULT_MO (0)
 -
  #endif
 diff --git a/tcg/s390x/tcg-target-mo.h b/tcg/s390x/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/s390x/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 +
 +#endif
 diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target.h
 +++ b/tcg/s390x/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
  #define TCG_TARGET_NB_REGS 64
 -#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 -
  #endif
 diff --git a/tcg/sparc64/tcg-target-mo.h b/tcg/sparc64/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/sparc64/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2008 Fabrice Bellard
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +#define TCG_TARGET_DEFAULT_MO  0
 +
 +#endif
 diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc64/tcg-target.h
 +++ b/tcg/sparc64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_AREG0 TCG_REG_I0
 -#define TCG_TARGET_DEFAULT_MO (0)
 -
  #endif
 diff --git a/tcg/tci/tcg-target-mo.h b/tcg/tci/tcg-target-mo.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tcg/tci/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
 +/* SPDX-License-Identifier: MIT */
 +/*
 + * Define target-specific memory model
 + * Copyright (c) 2009, 2011 Stefan Weil
 + */
 +
 +#ifndef TCG_TARGET_MO_H
 +#define TCG_TARGET_MO_H
 +
 +/*
 + * We could notice __i386__ or __s390x__ and reduce the barriers depending
 + * on the host.  But if you want performance, you use the normal backend.
 + * We prefer consistency across hosts on this.
 + */
 +#define TCG_TARGET_DEFAULT_MO  0
 +
 +#endif
 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tci/tcg-target.h
 +++ b/tcg/tci/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define HAVE_TCG_QEMU_TB_EXEC
  #define TCG_TARGET_NEED_POOL_LABELS
 -/* We could notice __i386__ or __s390x__ and reduce the barriers depending
 -   on the host.  But if you want performance, you use the normal backend.
 -   We prefer consistency across hosts on this.  */
 -#define TCG_TARGET_DEFAULT_MO  (0)
 -
  #endif /* TCG_TARGET_H */
 diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-op-ldst.c
 +++ b/tcg/tcg-op-ldst.c
@@ -XXX,XX +XXX,XX @@
  #include "exec/plugin-gen.h"
  #include "tcg-internal.h"
  #include "tcg-has.h"
 +#include "tcg-target-mo.h"
  static void check_max_alignment(unsigned a_bits)
  {
-     return fold_const2(ctx, op);
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-             }
-             break;
--        CASE_OP_32_64(deposit):
--            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
--                tmp = deposit64(arg_info(op->args[1])->val,
--                                op->args[3], op->args[4],
--                                arg_info(op->args[2])->val);
--                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
--                continue;
--            }
--            break;
--
-         default:
-             break;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-         CASE_OP_32_64(ctpop):
-             done = fold_ctpop(&ctx, op);
-             break;
-+        CASE_OP_32_64(deposit):
-+            done = fold_deposit(&ctx, op);
-+            break;
-         CASE_OP_32_64(div):
-         CASE_OP_32_64(divu):
-             done = fold_divide(&ctx, op);
 --
-.25.1
+.43.0

-[PULL 53/56] tcg/optimize: Propagate sign info for logical operations
+[PULL 34/68] tcg: Use C_NotImplemented in tcg_target_op_def
-Sign repetitions are perforce all identical, whether they are 1 or 0.
+Return C_NotImplemented instead of asserting for opcodes
-Bitwise operations preserve the relative quantity of the repetitions.
+not implemented by the backend.  For now, the assertion
 moves to process_op_defs.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 29 +++++++++++++++++++++++++++++
+ tcg/tcg.c                        | 10 ++++++----
-file changed, 29 insertions(+)
+ tcg/aarch64/tcg-target.c.inc     |  2 +-
  tcg/arm/tcg-target.c.inc         |  2 +-
  tcg/i386/tcg-target.c.inc        |  2 +-
  tcg/loongarch64/tcg-target.c.inc |  2 +-
  tcg/mips/tcg-target.c.inc        |  2 +-
  tcg/ppc/tcg-target.c.inc         |  2 +-
  tcg/riscv/tcg-target.c.inc       |  2 +-
  tcg/s390x/tcg-target.c.inc       |  2 +-
  tcg/sparc64/tcg-target.c.inc     |  2 +-
  tcg/tci/tcg-target.c.inc         |  2 +-
 files changed, 16 insertions(+), 14 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/tcg.c
-+++ b/tcg/optimize.c
++++ b/tcg/tcg.c
-@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ static int tcg_out_pool_finalize(TCGContext *s)
-     z2 = arg_info(op->args[2])->z_mask;
+ #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
-     ctx->z_mask = z1 & z2;
+ typedef enum {
-+    /*
++    C_NotImplemented = -1,
-+     * Sign repetitions are perforce all identical, whether they are 1 or 0.
+ #include "tcg-target-con-set.h"
-+     * Bitwise operations preserve the relative quantity of the repetitions.
+ } TCGConstraintSetIndex;
-+     */
-+    ctx->s_mask = arg_info(op->args[1])->s_mask
+@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
-+                & arg_info(op->args[2])->s_mask;
+         const TCGTargetOpDef *tdefs;
-+
+         bool saw_alias_pair = false;
-     /*
+         int i, o, i2, o2, nb_args;
-      * Known-zeros does not imply known-ones.  Therefore unless
++        TCGConstraintSetIndex con_set;
-      * arg2 is constant, we can't infer affected bits from it.
-@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
+         if (def->flags & TCG_OPF_NOT_PRESENT) {
              continue;
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
          /*
           * Macro magic should make it impossible, but double-check that
 -         * the array index is in range.  Since the signness of an enum
 -         * is implementation defined, force the result to unsigned.
 +         * the array index is in range.  At the same time, double-check
 +         * that the opcode is implemented, i.e. not C_NotImplemented.
           */
 -        unsigned con_set = tcg_target_op_def(op);
 -        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
 +        con_set = tcg_target_op_def(op);
 +        tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
          tdefs = &constraint_sets[con_set];
          for (i = 0; i < nb_args; i++) {
 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target.c.inc
 +++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
          return C_O1_I2(w, 0, w);
      default:
 -        g_assert_not_reached();
 +        return C_NotImplemented;
      }
-     ctx->z_mask = z1;
-+    ctx->s_mask = arg_info(op->args[1])->s_mask
-+                & arg_info(op->args[2])->s_mask;
-     return fold_masks(ctx, op);
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
+diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
-         fold_xi_to_not(ctx, op, 0)) {
+index XXXXXXX..XXXXXXX 100644
-         return true;
+--- a/tcg/arm/tcg-target.c.inc
 +++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
      case INDEX_op_bitsel_vec:
          return C_O1_I3(w, w, w, w);
      default:
 -        g_assert_not_reached();
 +        return C_NotImplemented;
      }
-+
-+    ctx->s_mask = arg_info(op->args[1])->s_mask
-+                & arg_info(op->args[2])->s_mask;
-     return false;
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
+diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
-     ctx->z_mask = arg_info(op->args[3])->z_mask
+--- a/tcg/i386/tcg-target.c.inc
-                 | arg_info(op->args[4])->z_mask;
++++ b/tcg/i386/tcg-target.c.inc
-+    ctx->s_mask = arg_info(op->args[3])->s_mask
+@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-+                & arg_info(op->args[4])->s_mask;
+         return C_O1_I4(x, x, x, xO, x);
-     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
+     default:
-         uint64_t tv = arg_info(op->args[3])->val;
+-        g_assert_not_reached();
-@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
++        return C_NotImplemented;
          fold_xi_to_not(ctx, op, -1)) {
          return true;
      }
-+
-+    ctx->s_mask = arg_info(op->args[1])->s_mask
-+                & arg_info(op->args[2])->s_mask;
-     return false;
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
+diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
-         fold_xi_to_not(ctx, op, 0)) {
+index XXXXXXX..XXXXXXX 100644
-         return true;
+--- a/tcg/loongarch64/tcg-target.c.inc
 +++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
          return C_O1_I3(w, w, w, w);
      default:
 -        g_assert_not_reached();
 +        return C_NotImplemented;
      }
-+
-+    ctx->s_mask = arg_info(op->args[1])->s_mask
-+                & arg_info(op->args[2])->s_mask;
-     return false;
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
+diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
-         return true;
+index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.c.inc
 +++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
                  : C_O0_I4(rZ, rZ, r, r));
      default:
 -        g_assert_not_reached();
 +        return C_NotImplemented;
      }
-+    ctx->s_mask = arg_info(op->args[1])->s_mask;
-+
-     /* Because of fold_to_not, we want to always return true, via finish. */
-     finish_folding(ctx, op);
-     return true;
-@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
-     ctx->z_mask = arg_info(op->args[1])->z_mask
-                 | arg_info(op->args[2])->z_mask;
-+    ctx->s_mask = arg_info(op->args[1])->s_mask
-+                & arg_info(op->args[2])->s_mask;
-     return fold_masks(ctx, op);
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
+diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
-         fold_ix_to_not(ctx, op, 0)) {
+index XXXXXXX..XXXXXXX 100644
-         return true;
+--- a/tcg/ppc/tcg-target.c.inc
 +++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
          return C_O1_I4(v, v, v, vZM, v);
      default:
 -        g_assert_not_reached();
 +        return C_NotImplemented;
      }
-+
-+    ctx->s_mask = arg_info(op->args[1])->s_mask
-+                & arg_info(op->args[2])->s_mask;
-     return false;
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
+diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
-     ctx->z_mask = arg_info(op->args[1])->z_mask
+--- a/tcg/riscv/tcg-target.c.inc
-                 | arg_info(op->args[2])->z_mask;
++++ b/tcg/riscv/tcg-target.c.inc
-+    ctx->s_mask = arg_info(op->args[1])->s_mask
+@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-+                & arg_info(op->args[2])->s_mask;
+     case INDEX_op_cmpsel_vec:
-     return fold_masks(ctx, op);
+         return C_O1_I4(v, v, vL, vK, vK);
      default:
 -        g_assert_not_reached();
 +        return C_NotImplemented;
      }
  }
+diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/s390x/tcg-target.c.inc
++++ b/tcg/s390x/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+                 : C_O1_I4(v, v, v, vZ, v));
+     default:
+-        g_assert_not_reached();
++        return C_NotImplemented;
+     }
+ }
+diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/sparc64/tcg-target.c.inc
++++ b/tcg/sparc64/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+         return C_O1_I2(r, r, r);
+     default:
+-        g_assert_not_reached();
++        return C_NotImplemented;
+     }
+ }
+diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tci/tcg-target.c.inc
++++ b/tcg/tci/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+         return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
+     default:
+-        g_assert_not_reached();
++        return C_NotImplemented;
+     }
+ }
 --
-.25.1
+.43.0

-[PULL 38/56] tcg/optimize: Add type to OptContext
+[PULL 35/68] tcg: Change have_vec to has_type in tcg_op_supported
-Compute the type of the operation early.
+Test each vector type, not just lumping them all together.
 Add tests for I32 (always true) and I64 (64-bit hosts).
-There are at least 4 places that used a def->flags ladder
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 to determine the type of the operation being optimized.
 There were two places that assumed !TCG_OPF_64BIT means
 TCG_TYPE_I32, and so could potentially compute incorrect
 results for vector operations.
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
+ tcg/tcg.c | 66 ++++++++++++++++++++++++++++++++++++-------------------
-file changed, 89 insertions(+), 60 deletions(-)
+file changed, 43 insertions(+), 23 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/tcg.c
-+++ b/tcg/optimize.c
++++ b/tcg/tcg.c
-@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
+@@ -XXX,XX +XXX,XX @@ TCGTemp *tcgv_i32_temp(TCGv_i32 v)
+  */
-     /* In flight values from optimization. */
+ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
      uint64_t z_mask;
 +    TCGType type;
  } OptContext;
  static inline TempOptInfo *ts_info(TCGTemp *ts)
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
  {
-     TCGTemp *dst_ts = arg_temp(dst);
+-    const bool have_vec
-     TCGTemp *src_ts = arg_temp(src);
+-        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
--    const TCGOpDef *def;
++    bool has_type;
      TempOptInfo *di;
      TempOptInfo *si;
      uint64_t z_mask;
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
      reset_ts(dst_ts);
      di = ts_info(dst_ts);
      si = ts_info(src_ts);
 -    def = &tcg_op_defs[op->opc];
 -    if (def->flags & TCG_OPF_VECTOR) {
 -        new_op = INDEX_op_mov_vec;
 -    } else if (def->flags & TCG_OPF_64BIT) {
 -        new_op = INDEX_op_mov_i64;
 -    } else {
 +
-+    switch (ctx->type) {
++    switch (type) {
 +    case TCG_TYPE_I32:
-         new_op = INDEX_op_mov_i32;
++        has_type = true;
 +        break;
 +    case TCG_TYPE_I64:
-+        new_op = INDEX_op_mov_i64;
++        has_type = TCG_TARGET_REG_BITS == 64;
 +        break;
 +    case TCG_TYPE_V64:
++        has_type = TCG_TARGET_HAS_v64;
++        break;
 +    case TCG_TYPE_V128:
++        has_type = TCG_TARGET_HAS_v128;
++        break;
 +    case TCG_TYPE_V256:
-+        /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
++        has_type = TCG_TARGET_HAS_v256;
 +        new_op = INDEX_op_mov_vec;
 +        break;
 +    default:
-+        g_assert_not_reached();
++        has_type = false;
-     }
++        break;
-     op->opc = new_op;
++    }
--    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
-     op->args[0] = dst;
+     switch (op) {
-     op->args[1] = src;
+     case INDEX_op_discard:
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
-@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+     case INDEX_op_or_vec:
- static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
+     case INDEX_op_xor_vec:
-                              TCGArg dst, uint64_t val)
+     case INDEX_op_cmp_vec:
- {
+-        return have_vec;
--    const TCGOpDef *def = &tcg_op_defs[op->opc];
++        return has_type;
--    TCGType type;
+     case INDEX_op_dup2_vec:
--    TCGTemp *tv;
+-        return have_vec && TCG_TARGET_REG_BITS == 32;
--
++        return has_type && TCG_TARGET_REG_BITS == 32;
--    if (def->flags & TCG_OPF_VECTOR) {
+     case INDEX_op_not_vec:
--        type = TCGOP_VECL(op) + TCG_TYPE_V64;
+-        return have_vec && TCG_TARGET_HAS_not_vec;
--    } else if (def->flags & TCG_OPF_64BIT) {
++        return has_type && TCG_TARGET_HAS_not_vec;
--        type = TCG_TYPE_I64;
+     case INDEX_op_neg_vec:
--    } else {
+-        return have_vec && TCG_TARGET_HAS_neg_vec;
--        type = TCG_TYPE_I32;
++        return has_type && TCG_TARGET_HAS_neg_vec;
--    }
+     case INDEX_op_abs_vec:
--
+-        return have_vec && TCG_TARGET_HAS_abs_vec;
-     /* Convert movi to mov with constant temp. */
++        return has_type && TCG_TARGET_HAS_abs_vec;
--    tv = tcg_constant_internal(type, val);
+     case INDEX_op_andc_vec:
-+    TCGTemp *tv = tcg_constant_internal(ctx->type, val);
+-        return have_vec && TCG_TARGET_HAS_andc_vec;
-+
++        return has_type && TCG_TARGET_HAS_andc_vec;
-     init_ts_info(ctx, tv);
+     case INDEX_op_orc_vec:
-     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
+-        return have_vec && TCG_TARGET_HAS_orc_vec;
- }
++        return has_type && TCG_TARGET_HAS_orc_vec;
-@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
+     case INDEX_op_nand_vec:
-     }
+-        return have_vec && TCG_TARGET_HAS_nand_vec;
- }
++        return has_type && TCG_TARGET_HAS_nand_vec;
+     case INDEX_op_nor_vec:
--static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
+-        return have_vec && TCG_TARGET_HAS_nor_vec;
-+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
++        return has_type && TCG_TARGET_HAS_nor_vec;
-+                                    uint64_t x, uint64_t y)
+     case INDEX_op_eqv_vec:
- {
+-        return have_vec && TCG_TARGET_HAS_eqv_vec;
--    const TCGOpDef *def = &tcg_op_defs[op];
++        return has_type && TCG_TARGET_HAS_eqv_vec;
-     uint64_t res = do_constant_folding_2(op, x, y);
+     case INDEX_op_mul_vec:
--    if (!(def->flags & TCG_OPF_64BIT)) {
+-        return have_vec && TCG_TARGET_HAS_mul_vec;
-+    if (type == TCG_TYPE_I32) {
++        return has_type && TCG_TARGET_HAS_mul_vec;
-         res = (int32_t)res;
+     case INDEX_op_shli_vec:
-     }
+     case INDEX_op_shri_vec:
-     return res;
+     case INDEX_op_sari_vec:
-@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
+-        return have_vec && TCG_TARGET_HAS_shi_vec;
-  * Return -1 if the condition can't be simplified,
++        return has_type && TCG_TARGET_HAS_shi_vec;
-  * and the result of the condition (0 or 1) if it can.
+     case INDEX_op_shls_vec:
-  */
+     case INDEX_op_shrs_vec:
--static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
+     case INDEX_op_sars_vec:
-+static int do_constant_folding_cond(TCGType type, TCGArg x,
+-        return have_vec && TCG_TARGET_HAS_shs_vec;
-                                     TCGArg y, TCGCond c)
++        return has_type && TCG_TARGET_HAS_shs_vec;
- {
+     case INDEX_op_shlv_vec:
-     uint64_t xv = arg_info(x)->val;
+     case INDEX_op_shrv_vec:
-     uint64_t yv = arg_info(y)->val;
+     case INDEX_op_sarv_vec:
+-        return have_vec && TCG_TARGET_HAS_shv_vec;
-     if (arg_is_const(x) && arg_is_const(y)) {
++        return has_type && TCG_TARGET_HAS_shv_vec;
--        const TCGOpDef *def = &tcg_op_defs[op];
+     case INDEX_op_rotli_vec:
--        tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
+-        return have_vec && TCG_TARGET_HAS_roti_vec;
--        if (def->flags & TCG_OPF_64BIT) {
++        return has_type && TCG_TARGET_HAS_roti_vec;
--            return do_constant_folding_cond_64(xv, yv, c);
+     case INDEX_op_rotls_vec:
--        } else {
+-        return have_vec && TCG_TARGET_HAS_rots_vec;
-+        switch (type) {
++        return has_type && TCG_TARGET_HAS_rots_vec;
-+        case TCG_TYPE_I32:
+     case INDEX_op_rotlv_vec:
-             return do_constant_folding_cond_32(xv, yv, c);
+     case INDEX_op_rotrv_vec:
-+        case TCG_TYPE_I64:
+-        return have_vec && TCG_TARGET_HAS_rotv_vec;
-+            return do_constant_folding_cond_64(xv, yv, c);
++        return has_type && TCG_TARGET_HAS_rotv_vec;
-+        default:
+     case INDEX_op_ssadd_vec:
-+            /* Only scalar comparisons are optimizable */
+     case INDEX_op_usadd_vec:
-+            return -1;
+     case INDEX_op_sssub_vec:
-         }
+     case INDEX_op_ussub_vec:
-     } else if (args_are_copies(x, y)) {
+-        return have_vec && TCG_TARGET_HAS_sat_vec;
-         return do_constant_folding_cond_eq(c);
++        return has_type && TCG_TARGET_HAS_sat_vec;
-@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
+     case INDEX_op_smin_vec:
-         uint64_t t;
+     case INDEX_op_umin_vec:
+     case INDEX_op_smax_vec:
-         t = arg_info(op->args[1])->val;
+     case INDEX_op_umax_vec:
--        t = do_constant_folding(op->opc, t, 0);
+-        return have_vec && TCG_TARGET_HAS_minmax_vec;
-+        t = do_constant_folding(op->opc, ctx->type, t, 0);
++        return has_type && TCG_TARGET_HAS_minmax_vec;
-         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+     case INDEX_op_bitsel_vec:
-     }
+-        return have_vec && TCG_TARGET_HAS_bitsel_vec;
-     return false;
++        return has_type && TCG_TARGET_HAS_bitsel_vec;
-@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
+     case INDEX_op_cmpsel_vec:
-         uint64_t t1 = arg_info(op->args[1])->val;
+-        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
-         uint64_t t2 = arg_info(op->args[2])->val;
++        return has_type && TCG_TARGET_HAS_cmpsel_vec;
--        t1 = do_constant_folding(op->opc, t1, t2);
+     default:
-+        t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
+         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
          return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
      }
      return false;
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
  static bool fold_brcond(OptContext *ctx, TCGOp *op)
  {
      TCGCond cond = op->args[2];
 -    int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
 +    int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
      if (i == 0) {
          tcg_op_remove(ctx->tcg, op);
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
           * Simplify EQ/NE comparisons where one of the pairs
           * can be simplified.
           */
 -        i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
 +        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
                                       op->args[2], cond);
          switch (i ^ inv) {
          case 0:
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
              goto do_brcond_high;
          }
 -        i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
 +        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
                                       op->args[3], cond);
          switch (i ^ inv) {
          case 0:
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
      if (arg_is_const(op->args[1])) {
          uint64_t t = arg_info(op->args[1])->val;
 -        t = do_constant_folding(op->opc, t, op->args[2]);
 +        t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
      }
      return false;
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
          uint64_t t = arg_info(op->args[1])->val;
          if (t != 0) {
 -            t = do_constant_folding(op->opc, t, 0);
 +            t = do_constant_folding(op->opc, ctx->type, t, 0);
              return tcg_opt_gen_movi(ctx, op, op->args[0], t);
          }
          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
  static bool fold_movcond(OptContext *ctx, TCGOp *op)
  {
 -    TCGOpcode opc = op->opc;
      TCGCond cond = op->args[5];
 -    int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
 +    int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
      if (i >= 0) {
          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
      if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
          uint64_t tv = arg_info(op->args[3])->val;
          uint64_t fv = arg_info(op->args[4])->val;
 +        TCGOpcode opc;
 -        opc = (opc == INDEX_op_movcond_i32
 -               ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
 +        switch (ctx->type) {
 +        case TCG_TYPE_I32:
 +            opc = INDEX_op_setcond_i32;
 +            break;
 +        case TCG_TYPE_I64:
 +            opc = INDEX_op_setcond_i64;
 +            break;
 +        default:
 +            g_assert_not_reached();
 +        }
          if (tv == 1 && fv == 0) {
              op->opc = opc;
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
  static bool fold_setcond(OptContext *ctx, TCGOp *op)
  {
      TCGCond cond = op->args[3];
 -    int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
 +    int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
      if (i >= 0) {
          return tcg_opt_gen_movi(ctx, op, op->args[0], i);
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
           * Simplify EQ/NE comparisons where one of the pairs
           * can be simplified.
           */
 -        i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
 +        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
                                       op->args[3], cond);
          switch (i ^ inv) {
          case 0:
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
              goto do_setcond_high;
          }
 -        i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
 +        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
                                       op->args[4], cond);
          switch (i ^ inv) {
          case 0:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
          copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
 +        /* Pre-compute the type of the operation. */
 +        if (def->flags & TCG_OPF_VECTOR) {
 +            ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
 +        } else if (def->flags & TCG_OPF_64BIT) {
 +            ctx.type = TCG_TYPE_I64;
 +        } else {
 +            ctx.type = TCG_TYPE_I32;
 +        }
 +
          /* For commutative operations make constant second argument */
          switch (opc) {
          CASE_OP_32_64_VEC(add):
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                      /* Proceed with possible constant folding. */
                      break;
                  }
 -                if (opc == INDEX_op_sub_i32) {
 +                switch (ctx.type) {
 +                case TCG_TYPE_I32:
                      neg_op = INDEX_op_neg_i32;
                      have_neg = TCG_TARGET_HAS_neg_i32;
 -                } else if (opc == INDEX_op_sub_i64) {
 +                    break;
 +                case TCG_TYPE_I64:
                      neg_op = INDEX_op_neg_i64;
                      have_neg = TCG_TARGET_HAS_neg_i64;
 -                } else if (TCG_TARGET_HAS_neg_vec) {
 -                    TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
 -                    unsigned vece = TCGOP_VECE(op);
 -                    neg_op = INDEX_op_neg_vec;
 -                    have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
 -                } else {
                      break;
 +                case TCG_TYPE_V64:
 +                case TCG_TYPE_V128:
 +                case TCG_TYPE_V256:
 +                    neg_op = INDEX_op_neg_vec;
 +                    have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
 +                                                   TCGOP_VECE(op)) > 0;
 +                    break;
 +                default:
 +                    g_assert_not_reached();
                  }
                  if (!have_neg) {
                      break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  TCGOpcode not_op;
                  bool have_not;
 -                if (def->flags & TCG_OPF_VECTOR) {
 -                    not_op = INDEX_op_not_vec;
 -                    have_not = TCG_TARGET_HAS_not_vec;
 -                } else if (def->flags & TCG_OPF_64BIT) {
 -                    not_op = INDEX_op_not_i64;
 -                    have_not = TCG_TARGET_HAS_not_i64;
 -                } else {
 +                switch (ctx.type) {
 +                case TCG_TYPE_I32:
                      not_op = INDEX_op_not_i32;
                      have_not = TCG_TARGET_HAS_not_i32;
 +                    break;
 +                case TCG_TYPE_I64:
 +                    not_op = INDEX_op_not_i64;
 +                    have_not = TCG_TARGET_HAS_not_i64;
 +                    break;
 +                case TCG_TYPE_V64:
 +                case TCG_TYPE_V128:
 +                case TCG_TYPE_V256:
 +                    not_op = INDEX_op_not_vec;
 +                    have_not = TCG_TARGET_HAS_not_vec;
 +                    break;
 +                default:
 +                    g_assert_not_reached();
                  }
                  if (!have_not) {
                      break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             below, we can ignore high bits, but for further optimizations we
             need to record that the high bits contain garbage.  */
          partmask = z_mask;
 -        if (!(def->flags & TCG_OPF_64BIT)) {
 +        if (ctx.type == TCG_TYPE_I32) {
              z_mask |= ~(tcg_target_ulong)0xffffffffu;
              partmask &= 0xffffffffu;
              affected &= 0xffffffffu;
 --
-.25.1
+.43.0

-[PULL 09/56] tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
+[PULL 36/68] tcg: Reorg process_op_defs
-Adjust the interface to take the OptContext parameter instead
+Process each TCGConstraintSetIndex first.  Allocate TCGArgConstraint
-of TCGContext or both.
+arrays based on those.  Only afterward process the TCGOpcodes and
 share those TCGArgConstraint arrays.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
+ include/tcg/tcg.h |   7 +-
-file changed, 34 insertions(+), 33 deletions(-)
+ tcg/tcg.c         | 272 +++++++++++++++++++++++-----------------------
 files changed, 136 insertions(+), 143 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
+@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
- } TempOptInfo;
+     const char *name;
+     uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
- typedef struct OptContext {
+     uint8_t flags;
-+    TCGContext *tcg;
+-    TCGArgConstraint *args_ct;
-     TCGTempSet temps_used;
++    const TCGArgConstraint *args_ct;
- } OptContext;
+ } TCGOpDef;
-@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
+ extern TCGOpDef tcg_op_defs[];
-     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
+ extern const size_t tcg_op_defs_max;
 -typedef struct TCGTargetOpDef {
 -    TCGOpcode op;
 -    const char *args_ct_str[TCG_MAX_OP_ARGS];
 -} TCGTargetOpDef;
 -
  /*
   * tcg_op_supported:
   * Query if @op, for @type and @flags, is supported by the host
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
  /* Put all of the constraint sets into an array, indexed by the enum. */
 -#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
 -#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
 -#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
 -#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
 +typedef struct TCGConstraintSet {
 +    uint8_t nb_oargs, nb_iargs;
 +    const char *args_ct_str[TCG_MAX_OP_ARGS];
 +} TCGConstraintSet;
 -#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
 -#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
 -#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
 -#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 +#define C_O0_I1(I1)                     { 0, 1, { #I1 } },
 +#define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
 +#define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
 +#define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
 -#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
 -#define C_N1O1_I1(O1, O2, I1)           { .args_ct_str = { "&" #O1, #O2, #I1 } },
 -#define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
 +#define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
 +#define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
 +#define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
 +#define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
 -#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 -#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
 -#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
 -#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
 -#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
 +#define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
 +#define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
 +#define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
 -static const TCGTargetOpDef constraint_sets[] = {
 +#define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
 +#define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
 +#define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
 +#define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
 +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
 +
 +static const TCGConstraintSet constraint_sets[] = {
  #include "tcg-target-con-set.h"
  };
 -
  #undef C_O0_I1
  #undef C_O0_I2
  #undef C_O0_I3
@@ -XXX,XX +XXX,XX @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
  static void tcg_context_init(unsigned max_cpus)
  {
      TCGContext *s = &tcg_init_ctx;
 -    int op, total_args, n, i;
 -    TCGOpDef *def;
 -    TCGArgConstraint *args_ct;
 +    int n, i;
      TCGTemp *ts;
      memset(s, 0, sizeof(*s));
      s->nb_globals = 0;
 -    /* Count total number of arguments and allocate the corresponding
 -       space */
 -    total_args = 0;
 -    for(op = 0; op < NB_OPS; op++) {
 -        def = &tcg_op_defs[op];
 -        n = def->nb_iargs + def->nb_oargs;
 -        total_args += n;
 -    }
 -
 -    args_ct = g_new0(TCGArgConstraint, total_args);
 -
 -    for(op = 0; op < NB_OPS; op++) {
 -        def = &tcg_op_defs[op];
 -        def->args_ct = args_ct;
 -        n = def->nb_iargs + def->nb_oargs;
 -        args_ct += n;
 -    }
 -
      init_call_layout(&info_helper_ld32_mmu);
      init_call_layout(&info_helper_ld64_mmu);
      init_call_layout(&info_helper_ld128_mmu);
@@ -XXX,XX +XXX,XX @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
  }
--static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
+ /* we give more priority to constraints with less registers */
-+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+-static int get_constraint_priority(const TCGOpDef *def, int k)
 +static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
  {
-     TCGTemp *dst_ts = arg_temp(dst);
+-    const TCGArgConstraint *arg_ct = &def->args_ct[k];
-     TCGTemp *src_ts = arg_temp(src);
+-    int n = ctpop64(arg_ct->regs);
-@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
++    int n;
-     TCGOpcode new_op;
++
++    arg_ct += k;
-     if (ts_are_copies(dst_ts, src_ts)) {
++    n = ctpop64(arg_ct->regs);
--        tcg_op_remove(s, op);
-+        tcg_op_remove(ctx->tcg, op);
+     /*
-         return;
+      * Sort constraints of a single register first, which includes output
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
  }
  /* sort from highest priority to lowest */
 -static void sort_constraints(TCGOpDef *def, int start, int n)
 +static void sort_constraints(TCGArgConstraint *a, int start, int n)
  {
      int i, j;
 -    TCGArgConstraint *a = def->args_ct;
      for (i = 0; i < n; i++) {
          a[start + i].sort_index = start + i;
@@ -XXX,XX +XXX,XX @@ static void sort_constraints(TCGOpDef *def, int start, int n)
      }
+     for (i = 0; i < n - 1; i++) {
-@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
+         for (j = i + 1; j < n; j++) {
 -            int p1 = get_constraint_priority(def, a[start + i].sort_index);
 -            int p2 = get_constraint_priority(def, a[start + j].sort_index);
 +            int p1 = get_constraint_priority(a, a[start + i].sort_index);
 +            int p2 = get_constraint_priority(a, a[start + j].sort_index);
              if (p1 < p2) {
                  int tmp = a[start + i].sort_index;
                  a[start + i].sort_index = a[start + j].sort_index;
@@ -XXX,XX +XXX,XX @@ static void sort_constraints(TCGOpDef *def, int start, int n)
      }
  }
--static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
++static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
--                             TCGOp *op, TCGArg dst, uint64_t val)
++static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
-+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
++
-+                             TCGArg dst, uint64_t val)
+ static void process_op_defs(TCGContext *s)
  {
-     const TCGOpDef *def = &tcg_op_defs[op->opc];
+-    TCGOpcode op;
-     TCGType type;
+-
-@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
+-    for (op = 0; op < NB_OPS; op++) {
-     /* Convert movi to mov with constant temp. */
+-        TCGOpDef *def = &tcg_op_defs[op];
-     tv = tcg_constant_internal(type, val);
+-        const TCGTargetOpDef *tdefs;
-     init_ts_info(ctx, tv);
++    for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
--    tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
++        const TCGConstraintSet *tdefs = &constraint_sets[c];
-+    tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
++        TCGArgConstraint *args_ct = all_cts[c];
- }
++        int nb_oargs = tdefs->nb_oargs;
++        int nb_iargs = tdefs->nb_iargs;
- static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
++        int nb_args = nb_oargs + nb_iargs;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+         bool saw_alias_pair = false;
- {
+-        int i, o, i2, o2, nb_args;
-     int nb_temps, nb_globals, i;
+-        TCGConstraintSetIndex con_set;
-     TCGOp *op, *op_next, *prev_mb = NULL;
--    OptContext ctx = {};
+-        if (def->flags & TCG_OPF_NOT_PRESENT) {
-+    OptContext ctx = { .tcg = s };
+-            continue;
+-        }
-     /* Array VALS has an element for each temp.
+-
-        If this temp holds a constant then its value is kept in VALS' element.
+-        nb_args = def->nb_iargs + def->nb_oargs;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+-        if (nb_args == 0) {
-         CASE_OP_32_64(rotr):
+-            continue;
-             if (arg_is_const(op->args[1])
+-        }
-                 && arg_info(op->args[1])->val == 0) {
+-
--                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
+-        /*
-+                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
+-         * Macro magic should make it impossible, but double-check that
 -         * the array index is in range.  At the same time, double-check
 -         * that the opcode is implemented, i.e. not C_NotImplemented.
 -         */
 -        con_set = tcg_target_op_def(op);
 -        tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
 -        tdefs = &constraint_sets[con_set];
 -
 -        for (i = 0; i < nb_args; i++) {
 +        for (int i = 0; i < nb_args; i++) {
              const char *ct_str = tdefs->args_ct_str[i];
 -            bool input_p = i >= def->nb_oargs;
 -
 -            /* Incomplete TCGTargetOpDef entry. */
 -            tcg_debug_assert(ct_str != NULL);
 +            bool input_p = i >= nb_oargs;
 +            int o;
              switch (*ct_str) {
              case '0' ... '9':
                  o = *ct_str - '0';
                  tcg_debug_assert(input_p);
 -                tcg_debug_assert(o < def->nb_oargs);
 -                tcg_debug_assert(def->args_ct[o].regs != 0);
 -                tcg_debug_assert(!def->args_ct[o].oalias);
 -                def->args_ct[i] = def->args_ct[o];
 +                tcg_debug_assert(o < nb_oargs);
 +                tcg_debug_assert(args_ct[o].regs != 0);
 +                tcg_debug_assert(!args_ct[o].oalias);
 +                args_ct[i] = args_ct[o];
                  /* The output sets oalias.  */
 -                def->args_ct[o].oalias = 1;
 -                def->args_ct[o].alias_index = i;
 +                args_ct[o].oalias = 1;
 +                args_ct[o].alias_index = i;
                  /* The input sets ialias. */
 -                def->args_ct[i].ialias = 1;
 -                def->args_ct[i].alias_index = o;
 -                if (def->args_ct[i].pair) {
 +                args_ct[i].ialias = 1;
 +                args_ct[i].alias_index = o;
 +                if (args_ct[i].pair) {
                      saw_alias_pair = true;
                  }
                  tcg_debug_assert(ct_str[1] == '\0');
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
              case '&':
                  tcg_debug_assert(!input_p);
 -                def->args_ct[i].newreg = true;
 +                args_ct[i].newreg = true;
                  ct_str++;
                  break;
              case 'p': /* plus */
                  /* Allocate to the register after the previous. */
 -                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
 +                tcg_debug_assert(i > (input_p ? nb_oargs : 0));
                  o = i - 1;
 -                tcg_debug_assert(!def->args_ct[o].pair);
 -                tcg_debug_assert(!def->args_ct[o].ct);
 -                def->args_ct[i] = (TCGArgConstraint){
 +                tcg_debug_assert(!args_ct[o].pair);
 +                tcg_debug_assert(!args_ct[o].ct);
 +                args_ct[i] = (TCGArgConstraint){
                      .pair = 2,
                      .pair_index = o,
 -                    .regs = def->args_ct[o].regs << 1,
 -                    .newreg = def->args_ct[o].newreg,
 +                    .regs = args_ct[o].regs << 1,
 +                    .newreg = args_ct[o].newreg,
                  };
 -                def->args_ct[o].pair = 1;
 -                def->args_ct[o].pair_index = i;
 +                args_ct[o].pair = 1;
 +                args_ct[o].pair_index = i;
                  tcg_debug_assert(ct_str[1] == '\0');
                  continue;
              case 'm': /* minus */
                  /* Allocate to the register before the previous. */
 -                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
 +                tcg_debug_assert(i > (input_p ? nb_oargs : 0));
                  o = i - 1;
 -                tcg_debug_assert(!def->args_ct[o].pair);
 -                tcg_debug_assert(!def->args_ct[o].ct);
 -                def->args_ct[i] = (TCGArgConstraint){
 +                tcg_debug_assert(!args_ct[o].pair);
 +                tcg_debug_assert(!args_ct[o].ct);
 +                args_ct[i] = (TCGArgConstraint){
                      .pair = 1,
                      .pair_index = o,
 -                    .regs = def->args_ct[o].regs >> 1,
 -                    .newreg = def->args_ct[o].newreg,
 +                    .regs = args_ct[o].regs >> 1,
 +                    .newreg = args_ct[o].newreg,
                  };
 -                def->args_ct[o].pair = 2;
 -                def->args_ct[o].pair_index = i;
 +                args_ct[o].pair = 2;
 +                args_ct[o].pair_index = i;
                  tcg_debug_assert(ct_str[1] == '\0');
                  continue;
              }
-             break;
+@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+             do {
-             if (!arg_is_const(op->args[1])
+                 switch (*ct_str) {
-                 && arg_is_const(op->args[2])
+                 case 'i':
-                 && arg_info(op->args[2])->val == 0) {
+-                    def->args_ct[i].ct |= TCG_CT_CONST;
--                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
++                    args_ct[i].ct |= TCG_CT_CONST;
-+                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
+                     break;
-                 continue;
-             }
+                 /* Include all of the target-specific constraints. */
-             break;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+ #undef CONST
-             if (!arg_is_const(op->args[1])
+ #define CONST(CASE, MASK) \
-                 && arg_is_const(op->args[2])
+-    case CASE: def->args_ct[i].ct |= MASK; break;
-                 && arg_info(op->args[2])->val == -1) {
++    case CASE: args_ct[i].ct |= MASK; break;
--                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+ #define REGS(CASE, MASK) \
-+                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
+-    case CASE: def->args_ct[i].regs |= MASK; break;
-                 continue;
++    case CASE: args_ct[i].regs |= MASK; break;
-             }
-             break;
+ #include "tcg-target-con-str.h"
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
-         if (partmask == 0) {
+                 case '&':
-             tcg_debug_assert(nb_oargs == 1);
+                 case 'p':
--            tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
+                 case 'm':
-+            tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
+-                    /* Typo in TCGTargetOpDef constraint. */
-             continue;
++                    /* Typo in TCGConstraintSet constraint. */
                      g_assert_not_reached();
                  }
              } while (*++ct_str != '\0');
          }
-         if (affected == 0) {
-             tcg_debug_assert(nb_oargs == 1);
+-        /* TCGTargetOpDef entry with too much information? */
--            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+-        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
-+            tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
+-
-             continue;
+         /*
           * Fix up output pairs that are aliased with inputs.
           * When we created the alias, we copied pair from the output.
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
           * first output to pair=3, and the pair_index'es to match.
           */
          if (saw_alias_pair) {
 -            for (i = def->nb_oargs; i < nb_args; i++) {
 +            for (int i = nb_oargs; i < nb_args; i++) {
 +                int o, o2, i2;
 +
                  /*
                   * Since [0-9pm] must be alone in the constraint string,
                   * the only way they can both be set is if the pair comes
                   * from the output alias.
                   */
 -                if (!def->args_ct[i].ialias) {
 +                if (!args_ct[i].ialias) {
                      continue;
                  }
 -                switch (def->args_ct[i].pair) {
 +                switch (args_ct[i].pair) {
                  case 0:
                      break;
                  case 1:
 -                    o = def->args_ct[i].alias_index;
 -                    o2 = def->args_ct[o].pair_index;
 -                    tcg_debug_assert(def->args_ct[o].pair == 1);
 -                    tcg_debug_assert(def->args_ct[o2].pair == 2);
 -                    if (def->args_ct[o2].oalias) {
 +                    o = args_ct[i].alias_index;
 +                    o2 = args_ct[o].pair_index;
 +                    tcg_debug_assert(args_ct[o].pair == 1);
 +                    tcg_debug_assert(args_ct[o2].pair == 2);
 +                    if (args_ct[o2].oalias) {
                          /* Case 1a */
 -                        i2 = def->args_ct[o2].alias_index;
 -                        tcg_debug_assert(def->args_ct[i2].pair == 2);
 -                        def->args_ct[i2].pair_index = i;
 -                        def->args_ct[i].pair_index = i2;
 +                        i2 = args_ct[o2].alias_index;
 +                        tcg_debug_assert(args_ct[i2].pair == 2);
 +                        args_ct[i2].pair_index = i;
 +                        args_ct[i].pair_index = i2;
                      } else {
                          /* Case 1b */
 -                        def->args_ct[i].pair_index = i;
 +                        args_ct[i].pair_index = i;
                      }
                      break;
                  case 2:
 -                    o = def->args_ct[i].alias_index;
 -                    o2 = def->args_ct[o].pair_index;
 -                    tcg_debug_assert(def->args_ct[o].pair == 2);
 -                    tcg_debug_assert(def->args_ct[o2].pair == 1);
 -                    if (def->args_ct[o2].oalias) {
 +                    o = args_ct[i].alias_index;
 +                    o2 = args_ct[o].pair_index;
 +                    tcg_debug_assert(args_ct[o].pair == 2);
 +                    tcg_debug_assert(args_ct[o2].pair == 1);
 +                    if (args_ct[o2].oalias) {
                          /* Case 1a */
 -                        i2 = def->args_ct[o2].alias_index;
 -                        tcg_debug_assert(def->args_ct[i2].pair == 1);
 -                        def->args_ct[i2].pair_index = i;
 -                        def->args_ct[i].pair_index = i2;
 +                        i2 = args_ct[o2].alias_index;
 +                        tcg_debug_assert(args_ct[i2].pair == 1);
 +                        args_ct[i2].pair_index = i;
 +                        args_ct[i].pair_index = i2;
                      } else {
                          /* Case 2 */
 -                        def->args_ct[i].pair = 3;
 -                        def->args_ct[o2].pair = 3;
 -                        def->args_ct[i].pair_index = o2;
 -                        def->args_ct[o2].pair_index = i;
 +                        args_ct[i].pair = 3;
 +                        args_ct[o2].pair = 3;
 +                        args_ct[i].pair_index = o2;
 +                        args_ct[o2].pair_index = i;
                      }
                      break;
                  default:
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
          }
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+         /* sort the constraints (XXX: this is just an heuristic) */
-         CASE_OP_32_64(mulsh):
+-        sort_constraints(def, 0, def->nb_oargs);
-             if (arg_is_const(op->args[2])
+-        sort_constraints(def, def->nb_oargs, def->nb_iargs);
-                 && arg_info(op->args[2])->val == 0) {
++        sort_constraints(args_ct, 0, nb_oargs);
--                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
++        sort_constraints(args_ct, nb_oargs, nb_iargs);
-+                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
++    }
-                 continue;
++
-             }
++    for (TCGOpcode op = 0; op < NB_OPS; op++) {
-             break;
++        TCGOpDef *def = &tcg_op_defs[op];
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++        const TCGConstraintSet *tdefs;
-         CASE_OP_32_64_VEC(or):
++        TCGConstraintSetIndex con_set;
-         CASE_OP_32_64_VEC(and):
++        int nb_args;
-             if (args_are_copies(op->args[1], op->args[2])) {
++
--                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
++        nb_args = def->nb_iargs + def->nb_oargs;
-+                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
++        if (nb_args == 0) {
-                 continue;
++            continue;
-             }
++        }
-             break;
++
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++        if (def->flags & TCG_OPF_NOT_PRESENT) {
-         CASE_OP_32_64_VEC(sub):
++            def->args_ct = empty_cts;
-         CASE_OP_32_64_VEC(xor):
++            continue;
-             if (args_are_copies(op->args[1], op->args[2])) {
++        }
--                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
++
-+                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
++        /*
-                 continue;
++         * Macro magic should make it impossible, but double-check that
-             }
++         * the array index is in range.  At the same time, double-check
-             break;
++         * that the opcode is implemented, i.e. not C_NotImplemented.
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++         */
-            allocator where needed and possible.  Also detect copies. */
++        con_set = tcg_target_op_def(op);
-         switch (opc) {
++        tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
-         CASE_OP_32_64_VEC(mov):
++
--            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
++        /* The constraint arguments must match TCGOpcode arguments. */
-+            tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
++        tdefs = &constraint_sets[con_set];
-             continue;
++        tcg_debug_assert(tdefs->nb_oargs == def->nb_oargs);
++        tcg_debug_assert(tdefs->nb_iargs == def->nb_iargs);
-         case INDEX_op_dup_vec:
++
-             if (arg_is_const(op->args[1])) {
++        def->args_ct = all_cts[con_set];
-                 tmp = arg_info(op->args[1])->val;
+     }
-                 tmp = dup_const(TCGOP_VECE(op), tmp);
+ }
--                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          case INDEX_op_dup2_vec:
              assert(TCG_TARGET_REG_BITS == 32);
              if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0],
 +                tcg_opt_gen_movi(&ctx, op, op->args[0],
                                   deposit64(arg_info(op->args[1])->val, 32, 32,
                                             arg_info(op->args[2])->val));
                  continue;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          case INDEX_op_extrh_i64_i32:
              if (arg_is_const(op->args[1])) {
                  tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (arg_is_const(op->args[1])) {
                  tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                            op->args[2]);
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
                  tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                            arg_info(op->args[2])->val);
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  TCGArg v = arg_info(op->args[1])->val;
                  if (v != 0) {
                      tmp = do_constant_folding(opc, v, 0);
 -                    tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                    tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  } else {
 -                    tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
 +                    tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
                  }
                  continue;
              }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  tmp = deposit64(arg_info(op->args[1])->val,
                                  op->args[3], op->args[4],
                                  arg_info(op->args[2])->val);
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (arg_is_const(op->args[1])) {
                  tmp = extract64(arg_info(op->args[1])->val,
                                  op->args[2], op->args[3]);
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (arg_is_const(op->args[1])) {
                  tmp = sextract64(arg_info(op->args[1])->val,
                                   op->args[2], op->args[3]);
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                      tmp = (int32_t)(((uint32_t)v1 >> shr) |
                                      ((uint32_t)v2 << (32 - shr)));
                  }
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              tmp = do_constant_folding_cond(opc, op->args[1],
                                             op->args[2], op->args[3]);
              if (tmp != 2) {
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              tmp = do_constant_folding_cond(opc, op->args[1],
                                             op->args[2], op->args[5]);
              if (tmp != 2) {
 -                tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
 +                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
                  continue;
              }
              if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  rl = op->args[0];
                  rh = op->args[1];
 -                tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
 -                tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
 +                tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
 +                tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  rl = op->args[0];
                  rh = op->args[1];
 -                tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
 -                tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
 +                tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
 +                tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
                  continue;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                              op->args[5]);
              if (tmp != 2) {
              do_setcond_const:
 -                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                  continue;
              }
              if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
 --
-.25.1
+.43.0

-[PULL 06/56] tcg/optimize: Rename "mask" to "z_mask"
+[PULL 37/68] tcg: Remove args_ct from TCGOpDef
-Prepare for tracking different masks by renaming this one.
+Introduce a new function, opcode_args_ct, to look up the argument
+set for an opcode.  We lose the ability to assert the correctness
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+of the map from TCGOpcode to constraint sets at startup, but we can
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+still validate at runtime upon lookup.
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Rename process_op_defs to process_constraint_sets, as it now does
 nothing to TCGOpDef.
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
+ include/tcg/tcg.h |  1 -
-file changed, 72 insertions(+), 70 deletions(-)
+ tcg/tcg-common.c  |  2 +-
+ tcg/tcg.c         | 82 ++++++++++++++++++++++-------------------------
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+files changed, 40 insertions(+), 45 deletions(-)
 diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
+@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
-     TCGTemp *prev_copy;
+     const char *name;
-     TCGTemp *next_copy;
+     uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
-     uint64_t val;
+     uint8_t flags;
--    uint64_t mask;
+-    const TCGArgConstraint *args_ct;
-+    uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
+ } TCGOpDef;
- } TempOptInfo;
+ extern TCGOpDef tcg_op_defs[];
- static inline TempOptInfo *ts_info(TCGTemp *ts)
+diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
-@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
+index XXXXXXX..XXXXXXX 100644
-     ti->next_copy = ts;
+--- a/tcg/tcg-common.c
-     ti->prev_copy = ts;
++++ b/tcg/tcg-common.c
-     ti->is_const = false;
+@@ -XXX,XX +XXX,XX @@
--    ti->mask = -1;
-+    ti->z_mask = -1;
+ TCGOpDef tcg_op_defs[] = {
  #define DEF(s, oargs, iargs, cargs, flags) \
 -         { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags, NULL },
 +         { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
  #include "tcg/tcg-opc.h"
  #undef DEF
  };
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
  }
- static void reset_temp(TCGArg arg)
+ static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
-@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
+-static void process_op_defs(TCGContext *s);
-     if (ts->kind == TEMP_CONST) {
++static void process_constraint_sets(void);
-         ti->is_const = true;
+ static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
-         ti->val = ts->val;
+                                             TCGReg reg, const char *name);
--        ti->mask = ts->val;
-+        ti->z_mask = ts->val;
+@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
-         if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
+     init_call_layout(&info_helper_st128_mmu);
-             /* High bits of a 32-bit quantity are garbage.  */
--            ti->mask |= ~0xffffffffull;
+     tcg_target_init(s);
-+            ti->z_mask |= ~0xffffffffull;
+-    process_op_defs(s);
-         }
++    process_constraint_sets();
-     } else {
-         ti->is_const = false;
+     /* Reverse the order of the saved registers, assuming they're all at
--        ti->mask = -1;
+        the start of tcg_target_reg_alloc_order.  */
-+        ti->z_mask = -1;
+@@ -XXX,XX +XXX,XX @@ static void sort_constraints(TCGArgConstraint *a, int start, int n)
-     }
+ static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
  static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
 -static void process_op_defs(TCGContext *s)
 +static void process_constraint_sets(void)
  {
      for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
          const TCGConstraintSet *tdefs = &constraint_sets[c];
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
          sort_constraints(args_ct, 0, nb_oargs);
          sort_constraints(args_ct, nb_oargs, nb_iargs);
      }
 +}
 -    for (TCGOpcode op = 0; op < NB_OPS; op++) {
 -        TCGOpDef *def = &tcg_op_defs[op];
 -        const TCGConstraintSet *tdefs;
 -        TCGConstraintSetIndex con_set;
 -        int nb_args;
 +static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
 +{
 +    TCGOpDef *def = &tcg_op_defs[op->opc];
 +    TCGConstraintSetIndex con_set;
 -        nb_args = def->nb_iargs + def->nb_oargs;
 -        if (nb_args == 0) {
 -            continue;
 -        }
 -
 -        if (def->flags & TCG_OPF_NOT_PRESENT) {
 -            def->args_ct = empty_cts;
 -            continue;
 -        }
 -
 -        /*
 -         * Macro magic should make it impossible, but double-check that
 -         * the array index is in range.  At the same time, double-check
 -         * that the opcode is implemented, i.e. not C_NotImplemented.
 -         */
 -        con_set = tcg_target_op_def(op);
 -        tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
 -
 -        /* The constraint arguments must match TCGOpcode arguments. */
 -        tdefs = &constraint_sets[con_set];
 -        tcg_debug_assert(tdefs->nb_oargs == def->nb_oargs);
 -        tcg_debug_assert(tdefs->nb_iargs == def->nb_iargs);
 -
 -        def->args_ct = all_cts[con_set];
 +    if (def->nb_iargs + def->nb_oargs == 0) {
 +        return NULL;
      }
 +    if (def->flags & TCG_OPF_NOT_PRESENT) {
 +        return empty_cts;
 +    }
 +
 +    con_set = tcg_target_op_def(op->opc);
 +    tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
 +
 +    /* The constraint arguments must match TCGOpcode arguments. */
 +    tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
 +    tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
 +
 +    return all_cts[con_set];
  }
-@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
+ static void remove_label_use(TCGOp *op, int idx)
-     const TCGOpDef *def;
+@@ -XXX,XX +XXX,XX @@ liveness_pass_1(TCGContext *s)
-     TempOptInfo *di;
+         TCGTemp *ts;
      TempOptInfo *si;
 -    uint64_t mask;
 +    uint64_t z_mask;
      TCGOpcode new_op;
      if (ts_are_copies(dst_ts, src_ts)) {
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
      op->args[0] = dst;
      op->args[1] = src;
 -    mask = si->mask;
 +    z_mask = si->z_mask;
      if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
          /* High bits of the destination are now garbage.  */
 -        mask |= ~0xffffffffull;
 +        z_mask |= ~0xffffffffull;
      }
 -    di->mask = mask;
 +    di->z_mask = z_mask;
      if (src_ts->type == dst_ts->type) {
          TempOptInfo *ni = ts_info(si->next_copy);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
      }
      QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
 -        uint64_t mask, partmask, affected, tmp;
 +        uint64_t z_mask, partmask, affected, tmp;
          int nb_oargs, nb_iargs;
          TCGOpcode opc = op->opc;
          const TCGOpDef *def = &tcg_op_defs[opc];
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++        const TCGArgConstraint *args_ct;
          /* Simplify using known-zero bits. Currently only ops with a single
             output argument is supported. */
 -        mask = -1;
 +        z_mask = -1;
          affected = -1;
          switch (opc) {
-         CASE_OP_32_64(ext8s):
+         case INDEX_op_call:
--            if ((arg_info(op->args[1])->mask & 0x80) != 0) {
+@@ -XXX,XX +XXX,XX @@ liveness_pass_1(TCGContext *s)
 +            if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
                  break;
-             }
-             QEMU_FALLTHROUGH;
+             default:
-         CASE_OP_32_64(ext8u):
++                args_ct = opcode_args_ct(op);
--            mask = 0xff;
+                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-+            z_mask = 0xff;
+-                    const TCGArgConstraint *ct = &def->args_ct[i];
-             goto and_const;
++                    const TCGArgConstraint *ct = &args_ct[i];
-         CASE_OP_32_64(ext16s):
+                     TCGRegSet set, *pset;
--            if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
-+            if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
+                     ts = arg_temp(op->args[i]);
-                 break;
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
-             }
+ {
-             QEMU_FALLTHROUGH;
+     const TCGLifeData arg_life = op->life;
-         CASE_OP_32_64(ext16u):
+     TCGRegSet dup_out_regs, dup_in_regs;
--            mask = 0xffff;
++    const TCGArgConstraint *dup_args_ct;
-+            z_mask = 0xffff;
+     TCGTemp *its, *ots;
-             goto and_const;
+     TCGType itype, vtype;
-         case INDEX_op_ext32s_i64:
+     unsigned vece;
--            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
-+            if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
+         return;
-                 break;
+     }
-             }
-             QEMU_FALLTHROUGH;
+-    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
-         case INDEX_op_ext32u_i64:
+-    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
--            mask = 0xffffffffU;
++    dup_args_ct = opcode_args_ct(op);
-+            z_mask = 0xffffffffU;
++    dup_out_regs = dup_args_ct[0].regs;
-             goto and_const;
++    dup_in_regs = dup_args_ct[1].regs;
-         CASE_OP_32_64(and):
+     /* Allocate the output register now.  */
--            mask = arg_info(op->args[2])->mask;
+     if (ots->val_type != TEMP_VAL_REG) {
-+            z_mask = arg_info(op->args[2])->z_mask;
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
-             if (arg_is_const(op->args[2])) {
+     int i, k, nb_iargs, nb_oargs;
-         and_const:
+     TCGReg reg;
--                affected = arg_info(op->args[1])->mask & ~mask;
+     TCGArg arg;
-+                affected = arg_info(op->args[1])->z_mask & ~z_mask;
++    const TCGArgConstraint *args_ct;
-             }
+     const TCGArgConstraint *arg_ct;
--            mask = arg_info(op->args[1])->mask & mask;
+     TCGTemp *ts;
-+            z_mask = arg_info(op->args[1])->z_mask & z_mask;
+     TCGArg new_args[TCG_MAX_OP_ARGS];
-             break;
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+         break;
-         case INDEX_op_ext_i32_i64:
+     }
--            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
-+            if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
++    args_ct = opcode_args_ct(op);
-                 break;
++
-             }
+     /* satisfy input constraints */
-             QEMU_FALLTHROUGH;
+     for (k = 0; k < nb_iargs; k++) {
-         case INDEX_op_extu_i32_i64:
+         TCGRegSet i_preferred_regs, i_required_regs;
-             /* We do not compute affected as it is a size changing op.  */
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
--            mask = (uint32_t)arg_info(op->args[1])->mask;
+         TCGTemp *ts2;
-+            z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
+         int i1, i2;
-             break;
+-        i = def->args_ct[nb_oargs + k].sort_index;
-         CASE_OP_32_64(andc):
++        i = args_ct[nb_oargs + k].sort_index;
-             /* Known-zeros does not imply known-ones.  Therefore unless
+         arg = op->args[i];
-                op->args[2] is constant, we can't infer anything from it.  */
+-        arg_ct = &def->args_ct[i];
-             if (arg_is_const(op->args[2])) {
++        arg_ct = &args_ct[i];
--                mask = ~arg_info(op->args[2])->mask;
+         ts = arg_temp(arg);
-+                z_mask = ~arg_info(op->args[2])->z_mask;
-                 goto and_const;
+         if (ts->val_type == TEMP_VAL_CONST
-             }
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
-             /* But we certainly know nothing outside args[1] may be set. */
+                  * register and move it.
--            mask = arg_info(op->args[1])->mask;
+                  */
-+            z_mask = arg_info(op->args[1])->z_mask;
+                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
-             break;
+-                    || def->args_ct[arg_ct->alias_index].newreg) {
++                    || args_ct[arg_ct->alias_index].newreg) {
-         case INDEX_op_sar_i32:
+                     allocate_new_reg = true;
-             if (arg_is_const(op->args[2])) {
+                 } else if (ts->val_type == TEMP_VAL_REG) {
-                 tmp = arg_info(op->args[2])->val & 31;
+                     /*
--                mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
 +                z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
              }
              break;
          case INDEX_op_sar_i64:
              if (arg_is_const(op->args[2])) {
                  tmp = arg_info(op->args[2])->val & 63;
 -                mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
 +                z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
              }
              break;
          case INDEX_op_shr_i32:
              if (arg_is_const(op->args[2])) {
                  tmp = arg_info(op->args[2])->val & 31;
 -                mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
 +                z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
              }
              break;
          case INDEX_op_shr_i64:
              if (arg_is_const(op->args[2])) {
                  tmp = arg_info(op->args[2])->val & 63;
 -                mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
 +                z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
              }
              break;
          case INDEX_op_extrl_i64_i32:
 -            mask = (uint32_t)arg_info(op->args[1])->mask;
 +            z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
              break;
          case INDEX_op_extrh_i64_i32:
 -            mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
 +            z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
              break;
          CASE_OP_32_64(shl):
              if (arg_is_const(op->args[2])) {
                  tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
 -                mask = arg_info(op->args[1])->mask << tmp;
 +                z_mask = arg_info(op->args[1])->z_mask << tmp;
              }
              break;
          CASE_OP_32_64(neg):
              /* Set to 1 all bits to the left of the rightmost.  */
 -            mask = -(arg_info(op->args[1])->mask
 -                     & -arg_info(op->args[1])->mask);
 +            z_mask = -(arg_info(op->args[1])->z_mask
 +                       & -arg_info(op->args[1])->z_mask);
              break;
          CASE_OP_32_64(deposit):
 -            mask = deposit64(arg_info(op->args[1])->mask,
 -                             op->args[3], op->args[4],
 -                             arg_info(op->args[2])->mask);
 +            z_mask = deposit64(arg_info(op->args[1])->z_mask,
 +                               op->args[3], op->args[4],
 +                               arg_info(op->args[2])->z_mask);
              break;
          CASE_OP_32_64(extract):
 -            mask = extract64(arg_info(op->args[1])->mask,
 -                             op->args[2], op->args[3]);
 +            z_mask = extract64(arg_info(op->args[1])->z_mask,
 +                               op->args[2], op->args[3]);
              if (op->args[2] == 0) {
 -                affected = arg_info(op->args[1])->mask & ~mask;
 +                affected = arg_info(op->args[1])->z_mask & ~z_mask;
              }
              break;
          CASE_OP_32_64(sextract):
 -            mask = sextract64(arg_info(op->args[1])->mask,
 -                              op->args[2], op->args[3]);
 -            if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
 -                affected = arg_info(op->args[1])->mask & ~mask;
 +            z_mask = sextract64(arg_info(op->args[1])->z_mask,
 +                                op->args[2], op->args[3]);
 +            if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
 +                affected = arg_info(op->args[1])->z_mask & ~z_mask;
              }
              break;
          CASE_OP_32_64(or):
          CASE_OP_32_64(xor):
 -            mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
 +            z_mask = arg_info(op->args[1])->z_mask
 +                   | arg_info(op->args[2])->z_mask;
              break;
          case INDEX_op_clz_i32:
          case INDEX_op_ctz_i32:
 -            mask = arg_info(op->args[2])->mask | 31;
 +            z_mask = arg_info(op->args[2])->z_mask | 31;
              break;
          case INDEX_op_clz_i64:
          case INDEX_op_ctz_i64:
 -            mask = arg_info(op->args[2])->mask | 63;
 +            z_mask = arg_info(op->args[2])->z_mask | 63;
              break;
          case INDEX_op_ctpop_i32:
 -            mask = 32 | 31;
 +            z_mask = 32 | 31;
              break;
          case INDEX_op_ctpop_i64:
 -            mask = 64 | 63;
 +            z_mask = 64 | 63;
              break;
          CASE_OP_32_64(setcond):
          case INDEX_op_setcond2_i32:
 -            mask = 1;
 +            z_mask = 1;
              break;
          CASE_OP_32_64(movcond):
 -            mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
 +            z_mask = arg_info(op->args[3])->z_mask
 +                   | arg_info(op->args[4])->z_mask;
              break;
          CASE_OP_32_64(ld8u):
 -            mask = 0xff;
 +            z_mask = 0xff;
              break;
          CASE_OP_32_64(ld16u):
 -            mask = 0xffff;
 +            z_mask = 0xffff;
              break;
          case INDEX_op_ld32u_i64:
 -            mask = 0xffffffffu;
 +            z_mask = 0xffffffffu;
              break;
          CASE_OP_32_64(qemu_ld):
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  MemOpIdx oi = op->args[nb_oargs + nb_iargs];
                  MemOp mop = get_memop(oi);
                  if (!(mop & MO_SIGN)) {
 -                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
 +                    z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
                  }
              }
              break;
          CASE_OP_32_64(bswap16):
 -            mask = arg_info(op->args[1])->mask;
 -            if (mask <= 0xffff) {
 +            z_mask = arg_info(op->args[1])->z_mask;
 +            if (z_mask <= 0xffff) {
                  op->args[2] |= TCG_BSWAP_IZ;
              }
 -            mask = bswap16(mask);
 +            z_mask = bswap16(z_mask);
              switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
              case TCG_BSWAP_OZ:
                  break;
              case TCG_BSWAP_OS:
 -                mask = (int16_t)mask;
 +                z_mask = (int16_t)z_mask;
                  break;
              default: /* undefined high bits */
 -                mask |= MAKE_64BIT_MASK(16, 48);
 +                z_mask |= MAKE_64BIT_MASK(16, 48);
                  break;
              }
              break;
          case INDEX_op_bswap32_i64:
 -            mask = arg_info(op->args[1])->mask;
 -            if (mask <= 0xffffffffu) {
 +            z_mask = arg_info(op->args[1])->z_mask;
 +            if (z_mask <= 0xffffffffu) {
                  op->args[2] |= TCG_BSWAP_IZ;
              }
 -            mask = bswap32(mask);
 +            z_mask = bswap32(z_mask);
              switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
              case TCG_BSWAP_OZ:
                  break;
              case TCG_BSWAP_OS:
 -                mask = (int32_t)mask;
 +                z_mask = (int32_t)z_mask;
                  break;
              default: /* undefined high bits */
 -                mask |= MAKE_64BIT_MASK(32, 32);
 +                z_mask |= MAKE_64BIT_MASK(32, 32);
                  break;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          /* 32-bit ops generate 32-bit results.  For the result is zero test
             below, we can ignore high bits, but for further optimizations we
             need to record that the high bits contain garbage.  */
 -        partmask = mask;
 +        partmask = z_mask;
          if (!(def->flags & TCG_OPF_64BIT)) {
 -            mask |= ~(tcg_target_ulong)0xffffffffu;
 +            z_mask |= ~(tcg_target_ulong)0xffffffffu;
              partmask &= 0xffffffffu;
              affected &= 0xffffffffu;
          }
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-                    vs the high word of the input.  */
+         /* satisfy the output constraints */
-             do_setcond_high:
+-        for(k = 0; k < nb_oargs; k++) {
-                 reset_temp(op->args[0]);
+-            i = def->args_ct[k].sort_index;
--                arg_info(op->args[0])->mask = 1;
++        for (k = 0; k < nb_oargs; k++) {
-+                arg_info(op->args[0])->z_mask = 1;
++            i = args_ct[k].sort_index;
-                 op->opc = INDEX_op_setcond_i32;
+             arg = op->args[i];
-                 op->args[1] = op->args[2];
+-            arg_ct = &def->args_ct[i];
-                 op->args[2] = op->args[4];
++            arg_ct = &args_ct[i];
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+             ts = arg_temp(arg);
-                 }
-             do_setcond_low:
+             /* ENV should not be modified.  */
-                 reset_temp(op->args[0]);
+@@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
--                arg_info(op->args[0])->mask = 1;
+     /* Allocate the output register now.  */
-+                arg_info(op->args[0])->z_mask = 1;
+     if (ots->val_type != TEMP_VAL_REG) {
-                 op->opc = INDEX_op_setcond_i32;
+         TCGRegSet allocated_regs = s->reserved_regs;
-                 op->args[2] = op->args[3];
+-        TCGRegSet dup_out_regs =
-                 op->args[3] = op->args[5];
+-            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++        TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
-             /* Default case: we know nothing about operation (or were unable
+         TCGReg oreg;
-                to compute the operation result) so no propagation is done.
-                We trash everything if the operation is the end of a basic
+         /* Make sure to not spill the input registers. */
 -               block, otherwise we only trash the output args.  "mask" is
 +               block, otherwise we only trash the output args.  "z_mask" is
                 the non-zero bits mask for the first output arg.  */
              if (def->flags & TCG_OPF_BB_END) {
                  memset(&temps_used, 0, sizeof(temps_used));
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                      /* Save the corresponding known-zero bits mask for the
                         first output argument (only one supported so far). */
                      if (i == 0) {
 -                        arg_info(op->args[i])->mask = mask;
 +                        arg_info(op->args[i])->z_mask = z_mask;
                      }
                  }
              }
 --
-.25.1
+.43.0

-[PULL 28/56] tcg/optimize: Split out fold_extract2
+[PULL 38/68] tcg: Constify tcg_op_defs
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Now that we're no longer assigning to TCGOpDef.args_ct,
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+we can make the array constant.
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
+ include/tcg/tcg.h | 2 +-
-file changed, 22 insertions(+), 17 deletions(-)
+ tcg/tcg-common.c  | 2 +-
  tcg/tcg.c         | 2 +-
 files changed, 3 insertions(+), 3 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
-     return fold_const2(ctx, op);
+     uint8_t flags;
- }
+ } TCGOpDef;
-+static bool fold_extract2(OptContext *ctx, TCGOp *op)
+-extern TCGOpDef tcg_op_defs[];
-+{
++extern const TCGOpDef tcg_op_defs[];
-+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+ extern const size_t tcg_op_defs_max;
-+        uint64_t v1 = arg_info(op->args[1])->val;
-+        uint64_t v2 = arg_info(op->args[2])->val;
+ /*
-+        int shr = op->args[3];
+diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
-+
+index XXXXXXX..XXXXXXX 100644
-+        if (op->opc == INDEX_op_extract2_i64) {
+--- a/tcg/tcg-common.c
-+            v1 >>= shr;
++++ b/tcg/tcg-common.c
-+            v2 <<= 64 - shr;
+@@ -XXX,XX +XXX,XX @@
-+        } else {
+ #include "tcg/tcg.h"
-+            v1 = (uint32_t)v1 >> shr;
+ #include "tcg-has.h"
-+            v2 = (int32_t)v2 << (32 - shr);
-+        }
+-TCGOpDef tcg_op_defs[] = {
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
++const TCGOpDef tcg_op_defs[] = {
-+    }
+ #define DEF(s, oargs, iargs, cargs, flags) \
-+    return false;
+          { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
-+}
+ #include "tcg/tcg-opc.h"
-+
+diff --git a/tcg/tcg.c b/tcg/tcg.c
- static bool fold_exts(OptContext *ctx, TCGOp *op)
+index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void process_constraint_sets(void)
  static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
  {
-     return fold_const1(ctx, op);
+-    TCGOpDef *def = &tcg_op_defs[op->opc];
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++    const TCGOpDef *def = &tcg_op_defs[op->opc];
-             }
+     TCGConstraintSetIndex con_set;
-             break;
+     if (def->nb_iargs + def->nb_oargs == 0) {
 -        CASE_OP_32_64(extract2):
 -            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 -                uint64_t v1 = arg_info(op->args[1])->val;
 -                uint64_t v2 = arg_info(op->args[2])->val;
 -                int shr = op->args[3];
 -
 -                if (opc == INDEX_op_extract2_i64) {
 -                    tmp = (v1 >> shr) | (v2 << (64 - shr));
 -                } else {
 -                    tmp = (int32_t)(((uint32_t)v1 >> shr) |
 -                                    ((uint32_t)v2 << (32 - shr)));
 -                }
 -                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
 -                continue;
 -            }
 -            break;
 -
          default:
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64(eqv):
              done = fold_eqv(&ctx, op);
              break;
 +        CASE_OP_32_64(extract2):
 +            done = fold_extract2(&ctx, op);
 +            break;
          CASE_OP_32_64(ext8s):
          CASE_OP_32_64(ext16s):
          case INDEX_op_ext32s_i64:
 --
-.25.1
+.43.0

-[PULL 26/56] tcg/optimize: Split out fold_addsub2_i32
+[PULL 39/68] tcg: Validate op supported in opcode_args_ct
-Add two additional helpers, fold_add2_i32 and fold_sub2_i32
+We should have checked that the op is supported before
-which will not be simple wrappers forever.
+emitting it.  The backend cannot be expected to have a
 constraint set for unsupported ops.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
+ tcg/tcg.c | 4 ++++
-file changed, 44 insertions(+), 26 deletions(-)
+file changed, 4 insertions(+)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/tcg.c
-+++ b/tcg/optimize.c
++++ b/tcg/tcg.c
-@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
-     return fold_const2(ctx, op);
+     const TCGOpDef *def = &tcg_op_defs[op->opc];
- }
+     TCGConstraintSetIndex con_set;
-+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
++#ifdef CONFIG_DEBUG_TCG
-+{
++    assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
-+    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
++#endif
 +        arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
 +        uint32_t al = arg_info(op->args[2])->val;
 +        uint32_t ah = arg_info(op->args[3])->val;
 +        uint32_t bl = arg_info(op->args[4])->val;
 +        uint32_t bh = arg_info(op->args[5])->val;
 +        uint64_t a = ((uint64_t)ah << 32) | al;
 +        uint64_t b = ((uint64_t)bh << 32) | bl;
 +        TCGArg rl, rh;
 +        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
 +
-+        if (add) {
+     if (def->nb_iargs + def->nb_oargs == 0) {
-+            a += b;
+         return NULL;
-+        } else {
+     }
 +            a -= b;
 +        }
 +
 +        rl = op->args[0];
 +        rh = op->args[1];
 +        tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
 +        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
 +        return true;
 +    }
 +    return false;
 +}
 +
 +static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_addsub2_i32(ctx, op, true);
 +}
 +
  static bool fold_and(OptContext *ctx, TCGOp *op)
  {
      return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
      return fold_const2(ctx, op);
  }
 +static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_addsub2_i32(ctx, op, false);
 +}
 +
  static bool fold_xor(OptContext *ctx, TCGOp *op)
  {
      return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              }
              break;
 -        case INDEX_op_add2_i32:
 -        case INDEX_op_sub2_i32:
 -            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
 -                && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
 -                uint32_t al = arg_info(op->args[2])->val;
 -                uint32_t ah = arg_info(op->args[3])->val;
 -                uint32_t bl = arg_info(op->args[4])->val;
 -                uint32_t bh = arg_info(op->args[5])->val;
 -                uint64_t a = ((uint64_t)ah << 32) | al;
 -                uint64_t b = ((uint64_t)bh << 32) | bl;
 -                TCGArg rl, rh;
 -                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
 -
 -                if (opc == INDEX_op_add2_i32) {
 -                    a += b;
 -                } else {
 -                    a -= b;
 -                }
 -
 -                rl = op->args[0];
 -                rh = op->args[1];
 -                tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
 -                tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
 -                continue;
 -            }
 -            break;
          default:
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64_VEC(add):
              done = fold_add(&ctx, op);
              break;
 +        case INDEX_op_add2_i32:
 +            done = fold_add2_i32(&ctx, op);
 +            break;
          CASE_OP_32_64_VEC(and):
              done = fold_and(&ctx, op);
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64_VEC(sub):
              done = fold_sub(&ctx, op);
              break;
 +        case INDEX_op_sub2_i32:
 +            done = fold_sub2_i32(&ctx, op);
 +            break;
          CASE_OP_32_64_VEC(xor):
              done = fold_xor(&ctx, op);
              break;
 --
-.25.1
+.43.0

-[PULL 25/56] tcg/optimize: Split out fold_mulu2_i32
+[PULL 40/68] tcg: Add TCG_OPF_NOT_PRESENT to opcodes without inputs or outputs
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+The br, mb, goto_tb and exit_tb opcodes do not have
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+register operands, only constants, flags, or labels.
 Remove the special case in opcode_args_ct by including
 TCG_OPF_NOT_PRESENT in the flags for these opcodes.
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 37 +++++++++++++++++++++----------------
+ include/tcg/tcg-opc.h | 8 ++++----
-file changed, 21 insertions(+), 16 deletions(-)
+ tcg/tcg.c             | 3 ---
 files changed, 4 insertions(+), 7 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg-opc.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg-opc.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
-     return fold_const2(ctx, op);
+ /* variable number of parameters */
- }
+ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
-+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
+-DEF(br, 0, 0, 1, TCG_OPF_BB_END)
-+{
++DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
-+    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
-+        uint32_t a = arg_info(op->args[2])->val;
+ #define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
-+        uint32_t b = arg_info(op->args[3])->val;
+ #if TCG_TARGET_REG_BITS == 32
-+        uint64_t r = (uint64_t)a * b;
+@@ -XXX,XX +XXX,XX @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END)
-+        TCGArg rl, rh;
+ # define IMPL64  TCG_OPF_64BIT
-+        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
+ #endif
-+
-+        rl = op->args[0];
+-DEF(mb, 0, 0, 1, 0)
-+        rh = op->args[1];
++DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
-+        tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
-+        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
+ DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
-+        return true;
+ DEF(setcond_i32, 1, 2, 1, 0)
-+    }
+@@ -XXX,XX +XXX,XX @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
-+    return false;
+ /* There are tcg_ctx->insn_start_words here, not just one. */
-+}
+ DEF(insn_start, 0, 0, DATA64_ARGS, TCG_OPF_NOT_PRESENT)
-+
- static bool fold_nand(OptContext *ctx, TCGOp *op)
+-DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
- {
+-DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
-     return fold_const2(ctx, op);
++DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
-             }
+ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
-             break;
+ DEF(plugin_cb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
--        case INDEX_op_mulu2_i32:
+diff --git a/tcg/tcg.c b/tcg/tcg.c
--            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
+index XXXXXXX..XXXXXXX 100644
--                uint32_t a = arg_info(op->args[2])->val;
+--- a/tcg/tcg.c
--                uint32_t b = arg_info(op->args[3])->val;
++++ b/tcg/tcg.c
--                uint64_t r = (uint64_t)a * b;
+@@ -XXX,XX +XXX,XX @@ static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
--                TCGArg rl, rh;
+     assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
--                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
+ #endif
--
--                rl = op->args[0];
+-    if (def->nb_iargs + def->nb_oargs == 0) {
--                rh = op->args[1];
+-        return NULL;
--                tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
+-    }
--                tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
+     if (def->flags & TCG_OPF_NOT_PRESENT) {
--                continue;
+         return empty_cts;
--            }
+     }
 -            break;
 -
          default:
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64(muluh):
              done = fold_mul_highpart(&ctx, op);
              break;
 +        case INDEX_op_mulu2_i32:
 +            done = fold_mulu2_i32(&ctx, op);
 +            break;
          CASE_OP_32_64(nand):
              done = fold_nand(&ctx, op);
              break;
 --
-.25.1
+.43.0

-[PULL 39/56] tcg/optimize: Split out fold_to_not
+[PULL 41/68] tcg: Pass type and flags to tcg_target_op_def
-Split out the conditional conversion from a more complex logical
+Allow the backend to make constraint choices based on more parameters.
 operation to a simple NOT.  Create a couple more helpers to make
 this easy for the outer-most logical operations.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
+ tcg/tcg.c                        | 4 ++--
-file changed, 86 insertions(+), 72 deletions(-)
+ tcg/aarch64/tcg-target.c.inc     | 3 ++-
  tcg/arm/tcg-target.c.inc         | 3 ++-
  tcg/i386/tcg-target.c.inc        | 3 ++-
  tcg/loongarch64/tcg-target.c.inc | 3 ++-
  tcg/mips/tcg-target.c.inc        | 3 ++-
  tcg/ppc/tcg-target.c.inc         | 3 ++-
  tcg/riscv/tcg-target.c.inc       | 3 ++-
  tcg/s390x/tcg-target.c.inc       | 3 ++-
  tcg/sparc64/tcg-target.c.inc     | 3 ++-
  tcg/tci/tcg-target.c.inc         | 3 ++-
 files changed, 22 insertions(+), 12 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/tcg.c
-+++ b/tcg/optimize.c
++++ b/tcg/tcg.c
-@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ typedef enum {
-     return false;
+ #include "tcg-target-con-set.h"
  } TCGConstraintSetIndex;
 -static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 +static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
  #undef C_O0_I1
  #undef C_O0_I2
@@ -XXX,XX +XXX,XX @@ static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
          return empty_cts;
      }
 -    con_set = tcg_target_op_def(op->opc);
 +    con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op));
      tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
      /* The constraint arguments must match TCGOpcode arguments. */
 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target.c.inc
 +++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
      }
  }
-+/*
+-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-+ * Convert @op to NOT, if NOT is supported by the host.
++static TCGConstraintSetIndex
-+ * Return true f the conversion is successful, which will still
++tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 + * indicate that the processing is complete.
 + */
 +static bool fold_not(OptContext *ctx, TCGOp *op);
 +static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
 +{
 +    TCGOpcode not_op;
 +    bool have_not;
 +
 +    switch (ctx->type) {
 +    case TCG_TYPE_I32:
 +        not_op = INDEX_op_not_i32;
 +        have_not = TCG_TARGET_HAS_not_i32;
 +        break;
 +    case TCG_TYPE_I64:
 +        not_op = INDEX_op_not_i64;
 +        have_not = TCG_TARGET_HAS_not_i64;
 +        break;
 +    case TCG_TYPE_V64:
 +    case TCG_TYPE_V128:
 +    case TCG_TYPE_V256:
 +        not_op = INDEX_op_not_vec;
 +        have_not = TCG_TARGET_HAS_not_vec;
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
 +    if (have_not) {
 +        op->opc = not_op;
 +        op->args[1] = op->args[idx];
 +        return fold_not(ctx, op);
 +    }
 +    return false;
 +}
 +
 +/* If the binary operation has first argument @i, fold to NOT. */
 +static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 +{
 +    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 +        return fold_to_not(ctx, op, 2);
 +    }
 +    return false;
 +}
 +
  /* If the binary operation has second argument @i, fold to @i. */
  static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
  {
-@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+     switch (op) {
-     return false;
+     case INDEX_op_goto_ptr:
 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.c.inc
 +++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
      }
  }
-+/* If the binary operation has second argument @i, fold to NOT. */
+-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
++static TCGConstraintSetIndex
-+{
++tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 +    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 +        return fold_to_not(ctx, op, 1);
 +    }
 +    return false;
 +}
 +
  /* If the binary operation has both arguments equal, fold to @i. */
  static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
  {
-@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
+     switch (op) {
- static bool fold_andc(OptContext *ctx, TCGOp *op)
+     case INDEX_op_goto_ptr:
 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.c.inc
 +++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
      }
  }
 -static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 +static TCGConstraintSetIndex
 +tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  {
-     if (fold_const2(ctx, op) ||
+     switch (op) {
--        fold_xx_to_i(ctx, op, 0)) {
+     case INDEX_op_goto_ptr:
-+        fold_xx_to_i(ctx, op, 0) ||
+diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
-+        fold_ix_to_not(ctx, op, -1)) {
+index XXXXXXX..XXXXXXX 100644
-         return true;
+--- a/tcg/loongarch64/tcg-target.c.inc
 +++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
      g_assert_not_reached();
  }
 -static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 +static TCGConstraintSetIndex
 +tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  {
      switch (op) {
      case INDEX_op_goto_ptr:
 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.c.inc
 +++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
      }
-     return false;
+ }
-@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
+-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
- static bool fold_eqv(OptContext *ctx, TCGOp *op)
++static TCGConstraintSetIndex
 +tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  {
--    return fold_const2(ctx, op);
+     switch (op) {
-+    if (fold_const2(ctx, op) ||
+     case INDEX_op_goto_ptr:
-+        fold_xi_to_not(ctx, op, 0)) {
+diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
-+        return true;
+index XXXXXXX..XXXXXXX 100644
-+    }
+--- a/tcg/ppc/tcg-target.c.inc
-+    return false;
++++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
      va_end(va);
  }
- static bool fold_extract(OptContext *ctx, TCGOp *op)
+-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
++static TCGConstraintSetIndex
++tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  static bool fold_nand(OptContext *ctx, TCGOp *op)
  {
--    return fold_const2(ctx, op);
+     switch (op) {
-+    if (fold_const2(ctx, op) ||
+     case INDEX_op_goto_ptr:
-+        fold_xi_to_not(ctx, op, -1)) {
+diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
-+        return true;
+index XXXXXXX..XXXXXXX 100644
-+    }
+--- a/tcg/riscv/tcg-target.c.inc
-+    return false;
++++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
      }
  }
- static bool fold_neg(OptContext *ctx, TCGOp *op)
+-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
++static TCGConstraintSetIndex
++tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  static bool fold_nor(OptContext *ctx, TCGOp *op)
  {
--    return fold_const2(ctx, op);
+     switch (op) {
-+    if (fold_const2(ctx, op) ||
+     case INDEX_op_goto_ptr:
-+        fold_xi_to_not(ctx, op, 0)) {
+diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
-+        return true;
+index XXXXXXX..XXXXXXX 100644
-+    }
+--- a/tcg/s390x/tcg-target.c.inc
-+    return false;
++++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
      va_end(va);
  }
- static bool fold_not(OptContext *ctx, TCGOp *op)
+-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 +static TCGConstraintSetIndex
 +tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  {
--    return fold_const1(ctx, op);
+     switch (op) {
-+    if (fold_const1(ctx, op)) {
+     case INDEX_op_goto_ptr:
-+        return true;
+diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
-+    }
+index XXXXXXX..XXXXXXX 100644
-+
+--- a/tcg/sparc64/tcg-target.c.inc
-+    /* Because of fold_to_not, we want to always return true, via finish. */
++++ b/tcg/sparc64/tcg-target.c.inc
-+    finish_folding(ctx, op);
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
-+    return true;
+     }
  }
- static bool fold_or(OptContext *ctx, TCGOp *op)
+-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
++static TCGConstraintSetIndex
++tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  static bool fold_orc(OptContext *ctx, TCGOp *op)
  {
--    return fold_const2(ctx, op);
+     switch (op) {
-+    if (fold_const2(ctx, op) ||
+     case INDEX_op_goto_ptr:
-+        fold_ix_to_not(ctx, op, 0)) {
+diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
-+        return true;
+index XXXXXXX..XXXXXXX 100644
-+    }
+--- a/tcg/tci/tcg-target.c.inc
-+    return false;
++++ b/tcg/tci/tcg-target.c.inc
- }
+@@ -XXX,XX +XXX,XX @@
+ #endif
- static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
+ #define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
-@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
- static bool fold_xor(OptContext *ctx, TCGOp *op)
+-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 +static TCGConstraintSetIndex
 +tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  {
-     if (fold_const2(ctx, op) ||
+     switch (op) {
--        fold_xx_to_i(ctx, op, 0)) {
+     case INDEX_op_goto_ptr:
 +        fold_xx_to_i(ctx, op, 0) ||
 +        fold_xi_to_not(ctx, op, -1)) {
          return true;
      }
      return false;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  }
              }
              break;
 -        CASE_OP_32_64_VEC(xor):
 -        CASE_OP_32_64(nand):
 -            if (!arg_is_const(op->args[1])
 -                && arg_is_const(op->args[2])
 -                && arg_info(op->args[2])->val == -1) {
 -                i = 1;
 -                goto try_not;
 -            }
 -            break;
 -        CASE_OP_32_64(nor):
 -            if (!arg_is_const(op->args[1])
 -                && arg_is_const(op->args[2])
 -                && arg_info(op->args[2])->val == 0) {
 -                i = 1;
 -                goto try_not;
 -            }
 -            break;
 -        CASE_OP_32_64_VEC(andc):
 -            if (!arg_is_const(op->args[2])
 -                && arg_is_const(op->args[1])
 -                && arg_info(op->args[1])->val == -1) {
 -                i = 2;
 -                goto try_not;
 -            }
 -            break;
 -        CASE_OP_32_64_VEC(orc):
 -        CASE_OP_32_64(eqv):
 -            if (!arg_is_const(op->args[2])
 -                && arg_is_const(op->args[1])
 -                && arg_info(op->args[1])->val == 0) {
 -                i = 2;
 -                goto try_not;
 -            }
 -            break;
 -        try_not:
 -            {
 -                TCGOpcode not_op;
 -                bool have_not;
 -
 -                switch (ctx.type) {
 -                case TCG_TYPE_I32:
 -                    not_op = INDEX_op_not_i32;
 -                    have_not = TCG_TARGET_HAS_not_i32;
 -                    break;
 -                case TCG_TYPE_I64:
 -                    not_op = INDEX_op_not_i64;
 -                    have_not = TCG_TARGET_HAS_not_i64;
 -                    break;
 -                case TCG_TYPE_V64:
 -                case TCG_TYPE_V128:
 -                case TCG_TYPE_V256:
 -                    not_op = INDEX_op_not_vec;
 -                    have_not = TCG_TARGET_HAS_not_vec;
 -                    break;
 -                default:
 -                    g_assert_not_reached();
 -                }
 -                if (!have_not) {
 -                    break;
 -                }
 -                op->opc = not_op;
 -                reset_temp(op->args[0]);
 -                op->args[1] = op->args[i];
 -                continue;
 -            }
          default:
              break;
          }
 --
-.25.1
+.43.0

-New patch
+[PULL 42/68] tcg: Add TCGType argument to tcg_out_op
+Pass TCGOp.type to the output function.
 For aarch64 and tci, use this instead of testing TCG_OPF_64BIT.
 For s390x, use this instead of testing INDEX_op_deposit_i64.
 For i386, use this to initialize rexw.
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
  tcg/tcg.c                        |  4 ++--
  tcg/aarch64/tcg-target.c.inc     |  6 +-----
  tcg/arm/tcg-target.c.inc         |  2 +-
  tcg/i386/tcg-target.c.inc        | 10 +++++-----
  tcg/loongarch64/tcg-target.c.inc |  2 +-
  tcg/mips/tcg-target.c.inc        |  2 +-
  tcg/ppc/tcg-target.c.inc         |  2 +-
  tcg/riscv/tcg-target.c.inc       |  2 +-
  tcg/s390x/tcg-target.c.inc       |  7 +++----
  tcg/sparc64/tcg-target.c.inc     |  2 +-
  tcg/tci/tcg-target.c.inc         |  4 ++--
 files changed, 19 insertions(+), 24 deletions(-)
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
  static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
  static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
  static void tcg_out_goto_tb(TCGContext *s, int which);
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS]);
  #if TCG_TARGET_MAYBE_vec
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
              tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
                             TCGOP_VECE(op), new_args, const_args);
          } else {
 -            tcg_out_op(s, op->opc, new_args, const_args);
 +            tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
          }
          break;
      }
 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target.c.inc
 +++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
      flush_idcache_range(jmp_rx, jmp_rw, 4);
  }
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS])
  {
 -    /* 99% of the time, we can signal the use of extension registers
 -       by looking to see if the opcode handles 64-bit data.  */
 -    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
 -
      /* Hoist the loads of the most common arguments.  */
      TCGArg a0 = args[0];
      TCGArg a1 = args[1];
 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.c.inc
 +++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
      flush_idcache_range(jmp_rx, jmp_rw, 4);
  }
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS])
  {
 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.c.inc
 +++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
      /* no need to flush icache explicitly */
  }
 -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 -                              const TCGArg args[TCG_MAX_OP_ARGS],
 -                              const int const_args[TCG_MAX_OP_ARGS])
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
 +                       const TCGArg args[TCG_MAX_OP_ARGS],
 +                       const int const_args[TCG_MAX_OP_ARGS])
  {
      TCGArg a0, a1, a2;
 -    int c, const_a2, vexop, rexw = 0;
 +    int c, const_a2, vexop, rexw;
  #if TCG_TARGET_REG_BITS == 64
  # define OP_32_64(x) \
          case glue(glue(INDEX_op_, x), _i64): \
 -            rexw = P_REXW; /* FALLTHRU */    \
          case glue(glue(INDEX_op_, x), _i32)
  #else
  # define OP_32_64(x) \
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
      a1 = args[1];
      a2 = args[2];
      const_a2 = const_args[2];
 +    rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
      switch (opc) {
      case INDEX_op_goto_ptr:
 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.c.inc
 +++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
      flush_idcache_range(jmp_rx, jmp_rw, 4);
  }
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS])
  {
 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.c.inc
 +++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
      /* Always indirect, nothing to do */
  }
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS])
  {
 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target.c.inc
 +++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
      flush_idcache_range(jmp_rx, jmp_rw, 4);
  }
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS])
  {
 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target.c.inc
 +++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
      flush_idcache_range(jmp_rx, jmp_rw, 4);
  }
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS])
  {
 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target.c.inc
 +++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
          case glue(glue(INDEX_op_,x),_i32): \
          case glue(glue(INDEX_op_,x),_i64)
 -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 -                              const TCGArg args[TCG_MAX_OP_ARGS],
 -                              const int const_args[TCG_MAX_OP_ARGS])
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
 +                       const TCGArg args[TCG_MAX_OP_ARGS],
 +                       const int const_args[TCG_MAX_OP_ARGS])
  {
      S390Opcode op, op2;
      TCGArg a0, a1, a2;
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
              /* Since we can't support "0Z" as a constraint, we allow a1 in
                 any register.  Fix things up as if a matching constraint.  */
              if (a0 != a1) {
 -                TCGType type = (opc == INDEX_op_deposit_i64);
                  if (a0 == a2) {
                      tcg_out_mov(s, type, TCG_TMP0, a2);
                      a2 = TCG_TMP0;
 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc64/tcg-target.c.inc
 +++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
  {
  }
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS])
  {
 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tci/tcg-target.c.inc
 +++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
      /* Always indirect, nothing to do */
  }
 -static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                         const TCGArg args[TCG_MAX_OP_ARGS],
                         const int const_args[TCG_MAX_OP_ARGS])
  {
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
      CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
          {
              TCGArg pos = args[2], len = args[3];
 -            TCGArg max = tcg_op_defs[opc].flags & TCG_OPF_64BIT ? 64 : 32;
 +            TCGArg max = type == TCG_TYPE_I32 ? 32 : 64;
              tcg_debug_assert(pos < max);
              tcg_debug_assert(pos + len <= max);
 --
 .43.0

-[PULL 24/56] tcg/optimize: Split out fold_setcond
+[PULL 43/68] tcg: Remove TCG_OPF_64BIT
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+This flag is no longer used.
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 23 ++++++++++++++---------
+ include/tcg/tcg-opc.h | 22 +++++++++++-----------
-file changed, 14 insertions(+), 9 deletions(-)
+ include/tcg/tcg.h     |  2 --
 files changed, 11 insertions(+), 13 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg-opc.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg-opc.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
-     return fold_const2(ctx, op);
- }
+ #define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
+ #if TCG_TARGET_REG_BITS == 32
-+static bool fold_setcond(OptContext *ctx, TCGOp *op)
+-# define IMPL64  TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT
-+{
++# define IMPL64  TCG_OPF_NOT_PRESENT
-+    TCGCond cond = op->args[3];
+ #else
-+    int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
+-# define IMPL64  TCG_OPF_64BIT
-+
++# define IMPL64  0
-+    if (i >= 0) {
+ #endif
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
-+    }
+ DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
-+    return false;
+@@ -XXX,XX +XXX,XX @@ DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
-+}
+ DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
-+
+ DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
- static bool fold_setcond2(OptContext *ctx, TCGOp *op)
- {
+-DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
-     TCGCond cond = op->args[5];
++DEF(mov_i64, 1, 1, 0, TCG_OPF_NOT_PRESENT)
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+ DEF(setcond_i64, 1, 2, 1, IMPL64)
-             }
+ DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64))
-             break;
+ DEF(movcond_i64, 1, 4, 1, IMPL64)
+@@ -XXX,XX +XXX,XX @@ DEF(qemu_ld_a32_i32, 1, 1, 1,
--        CASE_OP_32_64(setcond):
+ DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
--            i = do_constant_folding_cond(opc, op->args[1],
+     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
--                                         op->args[2], op->args[3]);
+ DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
--            if (i >= 0) {
+-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
--                tcg_opt_gen_movi(&ctx, op, op->args[0], i);
++    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
--                continue;
+ DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
--            }
+-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
--            break;
++    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
--
-         CASE_OP_32_64(movcond):
+ DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
-             i = do_constant_folding_cond(opc, op->args[1],
+     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-                                          op->args[2], op->args[5]);
+ DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-         CASE_OP_32_64(shr):
+ DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
-             done = fold_shift(&ctx, op);
+-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
-             break;
++    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-+        CASE_OP_32_64(setcond):
+ DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
-+            done = fold_setcond(&ctx, op);
+-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
-+            break;
++    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-         case INDEX_op_setcond2_i32:
-             done = fold_setcond2(&ctx, op);
+ /* Only used by i386 to cope with stupid register constraints. */
-             break;
+ DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
  /* Only for 64-bit hosts at the moment. */
  DEF(qemu_ld_a32_i128, 2, 1, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
 +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
      IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
  DEF(qemu_ld_a64_i128, 2, 1, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
 +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
      IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
  DEF(qemu_st_a32_i128, 0, 3, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
 +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
      IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
  DEF(qemu_st_a64_i128, 0, 3, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
 +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
      IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
  /* Host vector support.  */
 diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/tcg/tcg.h
 +++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ enum {
      /* Instruction has side effects: it cannot be removed if its outputs
         are not used, and might trigger exceptions.  */
      TCG_OPF_SIDE_EFFECTS = 0x08,
 -    /* Instruction operands are 64-bits (otherwise 32-bits).  */
 -    TCG_OPF_64BIT        = 0x10,
      /* Instruction is optional and not implemented by the host, or insn
         is generic and should not be implemented by the host.  */
      TCG_OPF_NOT_PRESENT  = 0x20,
 --
-.25.1
+.43.0

-[PULL 23/56] tcg/optimize: Split out fold_brcond
+[PULL 44/68] tcg: Drop implementation checks from tcg-opc.h
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Now that we use a functional interface to query whether the opcode
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+is supported, we can drop the TCG_OPF_NOT_PRESENT bit mapping from
 TCG_TARGET_HAS_foo in tcg-opc.h
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 33 +++++++++++++++++++--------------
+ include/tcg/tcg-opc.h | 306 +++++++++++++++++++-----------------------
-file changed, 19 insertions(+), 14 deletions(-)
+file changed, 141 insertions(+), 165 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/tcg/tcg-opc.h
-+++ b/tcg/optimize.c
++++ b/include/tcg/tcg-opc.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
-     return fold_const2(ctx, op);
- }
+ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
-+static bool fold_brcond(OptContext *ctx, TCGOp *op)
+-#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
-+{
+-#if TCG_TARGET_REG_BITS == 32
-+    TCGCond cond = op->args[2];
+-# define IMPL64  TCG_OPF_NOT_PRESENT
-+    int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
+-#else
-+
+-# define IMPL64  0
-+    if (i == 0) {
+-#endif
 +        tcg_op_remove(ctx->tcg, op);
 +        return true;
 +    }
 +    if (i > 0) {
 +        op->opc = INDEX_op_br;
 +        op->args[0] = op->args[3];
 +    }
 +    return false;
 +}
 +
  static bool fold_brcond2(OptContext *ctx, TCGOp *op)
  {
      TCGCond cond = op->args[4];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              }
              break;
 -        CASE_OP_32_64(brcond):
 -            i = do_constant_folding_cond(opc, op->args[0],
 -                                         op->args[1], op->args[2]);
 -            if (i == 0) {
 -                tcg_op_remove(s, op);
 -                continue;
 -            } else if (i > 0) {
 -                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
 -                op->opc = opc = INDEX_op_br;
 -                op->args[0] = op->args[3];
 -                break;
 -            }
 -            break;
 -
-         CASE_OP_32_64(movcond):
+ DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
-             i = do_constant_folding_cond(opc, op->args[1],
-                                          op->args[2], op->args[5]);
+ DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+ DEF(setcond_i32, 1, 2, 1, 0)
-         CASE_OP_32_64_VEC(andc):
+-DEF(negsetcond_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_negsetcond_i32))
-             done = fold_andc(&ctx, op);
++DEF(negsetcond_i32, 1, 2, 1, 0)
-             break;
+ DEF(movcond_i32, 1, 4, 1, 0)
-+        CASE_OP_32_64(brcond):
+ /* load/store */
-+            done = fold_brcond(&ctx, op);
+ DEF(ld8u_i32, 1, 1, 1, 0)
-+            break;
+@@ -XXX,XX +XXX,XX @@ DEF(st_i32, 0, 2, 1, 0)
-         case INDEX_op_brcond2_i32:
+ DEF(add_i32, 1, 2, 0, 0)
-             done = fold_brcond2(&ctx, op);
+ DEF(sub_i32, 1, 2, 0, 0)
-             break;
+ DEF(mul_i32, 1, 2, 0, 0)
 -DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
 -DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
 -DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
 -DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
 -DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
 -DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
 +DEF(div_i32, 1, 2, 0, 0)
 +DEF(divu_i32, 1, 2, 0, 0)
 +DEF(rem_i32, 1, 2, 0, 0)
 +DEF(remu_i32, 1, 2, 0, 0)
 +DEF(div2_i32, 2, 3, 0, 0)
 +DEF(divu2_i32, 2, 3, 0, 0)
  DEF(and_i32, 1, 2, 0, 0)
  DEF(or_i32, 1, 2, 0, 0)
  DEF(xor_i32, 1, 2, 0, 0)
@@ -XXX,XX +XXX,XX @@ DEF(xor_i32, 1, 2, 0, 0)
  DEF(shl_i32, 1, 2, 0, 0)
  DEF(shr_i32, 1, 2, 0, 0)
  DEF(sar_i32, 1, 2, 0, 0)
 -DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
 -DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
 -DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
 -DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
 -DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
 -DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32))
 +DEF(rotl_i32, 1, 2, 0, 0)
 +DEF(rotr_i32, 1, 2, 0, 0)
 +DEF(deposit_i32, 1, 2, 2, 0)
 +DEF(extract_i32, 1, 1, 2, 0)
 +DEF(sextract_i32, 1, 1, 2, 0)
 +DEF(extract2_i32, 1, 2, 1, 0)
  DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
 -DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
 -DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
 -DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
 -DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
 -DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
 -DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
 -DEF(brcond2_i32, 0, 4, 2,
 -    TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32))
 -DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
 +DEF(add2_i32, 2, 4, 0, 0)
 +DEF(sub2_i32, 2, 4, 0, 0)
 +DEF(mulu2_i32, 2, 2, 0, 0)
 +DEF(muls2_i32, 2, 2, 0, 0)
 +DEF(muluh_i32, 1, 2, 0, 0)
 +DEF(mulsh_i32, 1, 2, 0, 0)
 +DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
 +DEF(setcond2_i32, 1, 4, 1, 0)
 -DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
 -DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
 -DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
 -DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
 -DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))
 -DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))
 -DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
 +DEF(ext8s_i32, 1, 1, 0, 0)
 +DEF(ext16s_i32, 1, 1, 0, 0)
 +DEF(ext8u_i32, 1, 1, 0, 0)
 +DEF(ext16u_i32, 1, 1, 0, 0)
 +DEF(bswap16_i32, 1, 1, 1, 0)
 +DEF(bswap32_i32, 1, 1, 1, 0)
 +DEF(not_i32, 1, 1, 0, 0)
  DEF(neg_i32, 1, 1, 0, 0)
 -DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
 -DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
 -DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
 -DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
 -DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
 -DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
 -DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
 -DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
 +DEF(andc_i32, 1, 2, 0, 0)
 +DEF(orc_i32, 1, 2, 0, 0)
 +DEF(eqv_i32, 1, 2, 0, 0)
 +DEF(nand_i32, 1, 2, 0, 0)
 +DEF(nor_i32, 1, 2, 0, 0)
 +DEF(clz_i32, 1, 2, 0, 0)
 +DEF(ctz_i32, 1, 2, 0, 0)
 +DEF(ctpop_i32, 1, 1, 0, 0)
  DEF(mov_i64, 1, 1, 0, TCG_OPF_NOT_PRESENT)
 -DEF(setcond_i64, 1, 2, 1, IMPL64)
 -DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64))
 -DEF(movcond_i64, 1, 4, 1, IMPL64)
 +DEF(setcond_i64, 1, 2, 1, 0)
 +DEF(negsetcond_i64, 1, 2, 1, 0)
 +DEF(movcond_i64, 1, 4, 1, 0)
  /* load/store */
 -DEF(ld8u_i64, 1, 1, 1, IMPL64)
 -DEF(ld8s_i64, 1, 1, 1, IMPL64)
 -DEF(ld16u_i64, 1, 1, 1, IMPL64)
 -DEF(ld16s_i64, 1, 1, 1, IMPL64)
 -DEF(ld32u_i64, 1, 1, 1, IMPL64)
 -DEF(ld32s_i64, 1, 1, 1, IMPL64)
 -DEF(ld_i64, 1, 1, 1, IMPL64)
 -DEF(st8_i64, 0, 2, 1, IMPL64)
 -DEF(st16_i64, 0, 2, 1, IMPL64)
 -DEF(st32_i64, 0, 2, 1, IMPL64)
 -DEF(st_i64, 0, 2, 1, IMPL64)
 +DEF(ld8u_i64, 1, 1, 1, 0)
 +DEF(ld8s_i64, 1, 1, 1, 0)
 +DEF(ld16u_i64, 1, 1, 1, 0)
 +DEF(ld16s_i64, 1, 1, 1, 0)
 +DEF(ld32u_i64, 1, 1, 1, 0)
 +DEF(ld32s_i64, 1, 1, 1, 0)
 +DEF(ld_i64, 1, 1, 1, 0)
 +DEF(st8_i64, 0, 2, 1, 0)
 +DEF(st16_i64, 0, 2, 1, 0)
 +DEF(st32_i64, 0, 2, 1, 0)
 +DEF(st_i64, 0, 2, 1, 0)
  /* arith */
 -DEF(add_i64, 1, 2, 0, IMPL64)
 -DEF(sub_i64, 1, 2, 0, IMPL64)
 -DEF(mul_i64, 1, 2, 0, IMPL64)
 -DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
 -DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
 -DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
 -DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
 -DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
 -DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
 -DEF(and_i64, 1, 2, 0, IMPL64)
 -DEF(or_i64, 1, 2, 0, IMPL64)
 -DEF(xor_i64, 1, 2, 0, IMPL64)
 +DEF(add_i64, 1, 2, 0, 0)
 +DEF(sub_i64, 1, 2, 0, 0)
 +DEF(mul_i64, 1, 2, 0, 0)
 +DEF(div_i64, 1, 2, 0, 0)
 +DEF(divu_i64, 1, 2, 0, 0)
 +DEF(rem_i64, 1, 2, 0, 0)
 +DEF(remu_i64, 1, 2, 0, 0)
 +DEF(div2_i64, 2, 3, 0, 0)
 +DEF(divu2_i64, 2, 3, 0, 0)
 +DEF(and_i64, 1, 2, 0, 0)
 +DEF(or_i64, 1, 2, 0, 0)
 +DEF(xor_i64, 1, 2, 0, 0)
  /* shifts/rotates */
 -DEF(shl_i64, 1, 2, 0, IMPL64)
 -DEF(shr_i64, 1, 2, 0, IMPL64)
 -DEF(sar_i64, 1, 2, 0, IMPL64)
 -DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 -DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 -DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
 -DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64))
 -DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64))
 -DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64))
 +DEF(shl_i64, 1, 2, 0, 0)
 +DEF(shr_i64, 1, 2, 0, 0)
 +DEF(sar_i64, 1, 2, 0, 0)
 +DEF(rotl_i64, 1, 2, 0, 0)
 +DEF(rotr_i64, 1, 2, 0, 0)
 +DEF(deposit_i64, 1, 2, 2, 0)
 +DEF(extract_i64, 1, 1, 2, 0)
 +DEF(sextract_i64, 1, 1, 2, 0)
 +DEF(extract2_i64, 1, 2, 1, 0)
  /* size changing ops */
 -DEF(ext_i32_i64, 1, 1, 0, IMPL64)
 -DEF(extu_i32_i64, 1, 1, 0, IMPL64)
 -DEF(extrl_i64_i32, 1, 1, 0,
 -    IMPL(TCG_TARGET_HAS_extr_i64_i32)
 -    | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
 -DEF(extrh_i64_i32, 1, 1, 0,
 -    IMPL(TCG_TARGET_HAS_extr_i64_i32)
 -    | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
 +DEF(ext_i32_i64, 1, 1, 0, 0)
 +DEF(extu_i32_i64, 1, 1, 0, 0)
 +DEF(extrl_i64_i32, 1, 1, 0, 0)
 +DEF(extrh_i64_i32, 1, 1, 0, 0)
 -DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64)
 -DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
 -DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
 -DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
 -DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
 -DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
 -DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
 -DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
 -DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
 -DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
 -DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
 -DEF(neg_i64, 1, 1, 0, IMPL64)
 -DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
 -DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
 -DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
 -DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
 -DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
 -DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
 -DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))
 -DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64))
 +DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
 +DEF(ext8s_i64, 1, 1, 0, 0)
 +DEF(ext16s_i64, 1, 1, 0, 0)
 +DEF(ext32s_i64, 1, 1, 0, 0)
 +DEF(ext8u_i64, 1, 1, 0, 0)
 +DEF(ext16u_i64, 1, 1, 0, 0)
 +DEF(ext32u_i64, 1, 1, 0, 0)
 +DEF(bswap16_i64, 1, 1, 1, 0)
 +DEF(bswap32_i64, 1, 1, 1, 0)
 +DEF(bswap64_i64, 1, 1, 1, 0)
 +DEF(not_i64, 1, 1, 0, 0)
 +DEF(neg_i64, 1, 1, 0, 0)
 +DEF(andc_i64, 1, 2, 0, 0)
 +DEF(orc_i64, 1, 2, 0, 0)
 +DEF(eqv_i64, 1, 2, 0, 0)
 +DEF(nand_i64, 1, 2, 0, 0)
 +DEF(nor_i64, 1, 2, 0, 0)
 +DEF(clz_i64, 1, 2, 0, 0)
 +DEF(ctz_i64, 1, 2, 0, 0)
 +DEF(ctpop_i64, 1, 1, 0, 0)
 -DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
 -DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
 -DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
 -DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
 -DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
 -DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
 +DEF(add2_i64, 2, 4, 0, 0)
 +DEF(sub2_i64, 2, 4, 0, 0)
 +DEF(mulu2_i64, 2, 2, 0, 0)
 +DEF(muls2_i64, 2, 2, 0, 0)
 +DEF(muluh_i64, 1, 2, 0, 0)
 +DEF(mulsh_i64, 1, 2, 0, 0)
  #define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
  /* Only used by i386 to cope with stupid register constraints. */
  DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
 -    IMPL(TCG_TARGET_HAS_qemu_st8_i32))
 +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
  DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
 -    IMPL(TCG_TARGET_HAS_qemu_st8_i32))
 +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
  /* Only for 64-bit hosts at the moment. */
 -DEF(qemu_ld_a32_i128, 2, 1, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
 -    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
 -DEF(qemu_ld_a64_i128, 2, 1, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
 -    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
 -DEF(qemu_st_a32_i128, 0, 3, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
 -    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
 -DEF(qemu_st_a64_i128, 0, 3, 1,
 -    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
 -    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
 +DEF(qemu_ld_a32_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 +DEF(qemu_ld_a64_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 +DEF(qemu_st_a32_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 +DEF(qemu_st_a64_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
  /* Host vector support.  */
 -#define IMPLVEC  TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
 +#define IMPLVEC  TCG_OPF_VECTOR
  DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
  DEF(dup_vec, 1, 1, 0, IMPLVEC)
 -DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
 +DEF(dup2_vec, 1, 2, 0, IMPLVEC)
  DEF(ld_vec, 1, 1, 1, IMPLVEC)
  DEF(st_vec, 0, 2, 1, IMPLVEC)
@@ -XXX,XX +XXX,XX @@ DEF(dupm_vec, 1, 1, 1, IMPLVEC)
  DEF(add_vec, 1, 2, 0, IMPLVEC)
  DEF(sub_vec, 1, 2, 0, IMPLVEC)
 -DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec))
 -DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
 -DEF(abs_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_abs_vec))
 -DEF(ssadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
 -DEF(usadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
 -DEF(sssub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
 -DEF(ussub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
 -DEF(smin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
 -DEF(umin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
 -DEF(smax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
 -DEF(umax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
 +DEF(mul_vec, 1, 2, 0, IMPLVEC)
 +DEF(neg_vec, 1, 1, 0, IMPLVEC)
 +DEF(abs_vec, 1, 1, 0, IMPLVEC)
 +DEF(ssadd_vec, 1, 2, 0, IMPLVEC)
 +DEF(usadd_vec, 1, 2, 0, IMPLVEC)
 +DEF(sssub_vec, 1, 2, 0, IMPLVEC)
 +DEF(ussub_vec, 1, 2, 0, IMPLVEC)
 +DEF(smin_vec, 1, 2, 0, IMPLVEC)
 +DEF(umin_vec, 1, 2, 0, IMPLVEC)
 +DEF(smax_vec, 1, 2, 0, IMPLVEC)
 +DEF(umax_vec, 1, 2, 0, IMPLVEC)
  DEF(and_vec, 1, 2, 0, IMPLVEC)
  DEF(or_vec, 1, 2, 0, IMPLVEC)
  DEF(xor_vec, 1, 2, 0, IMPLVEC)
 -DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
 -DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
 -DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
 -DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
 -DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
 -DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
 +DEF(andc_vec, 1, 2, 0, IMPLVEC)
 +DEF(orc_vec, 1, 2, 0, IMPLVEC)
 +DEF(nand_vec, 1, 2, 0, IMPLVEC)
 +DEF(nor_vec, 1, 2, 0, IMPLVEC)
 +DEF(eqv_vec, 1, 2, 0, IMPLVEC)
 +DEF(not_vec, 1, 1, 0, IMPLVEC)
 -DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
 -DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
 -DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
 -DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
 +DEF(shli_vec, 1, 1, 1, IMPLVEC)
 +DEF(shri_vec, 1, 1, 1, IMPLVEC)
 +DEF(sari_vec, 1, 1, 1, IMPLVEC)
 +DEF(rotli_vec, 1, 1, 1, IMPLVEC)
 -DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
 -DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
 -DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
 -DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
 +DEF(shls_vec, 1, 2, 0, IMPLVEC)
 +DEF(shrs_vec, 1, 2, 0, IMPLVEC)
 +DEF(sars_vec, 1, 2, 0, IMPLVEC)
 +DEF(rotls_vec, 1, 2, 0, IMPLVEC)
 -DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
 -DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
 -DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
 -DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
 -DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
 +DEF(shlv_vec, 1, 2, 0, IMPLVEC)
 +DEF(shrv_vec, 1, 2, 0, IMPLVEC)
 +DEF(sarv_vec, 1, 2, 0, IMPLVEC)
 +DEF(rotlv_vec, 1, 2, 0, IMPLVEC)
 +DEF(rotrv_vec, 1, 2, 0, IMPLVEC)
  DEF(cmp_vec, 1, 2, 1, IMPLVEC)
 -DEF(bitsel_vec, 1, 3, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_bitsel_vec))
 -DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec))
 +DEF(bitsel_vec, 1, 3, 0, IMPLVEC)
 +DEF(cmpsel_vec, 1, 4, 1, IMPLVEC)
  DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
  #include "tcg-target-opc.h.inc"
  #undef DATA64_ARGS
 -#undef IMPL
 -#undef IMPL64
  #undef IMPLVEC
  #undef DEF
 --
-.25.1
+.43.0

-New patch
+[PULL 45/68] tcg: Replace IMPLVEC with TCG_OPF_VECTOR
+This is now a direct replacement.
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
  include/tcg/tcg-opc.h            | 89 +++++++++++++++-----------------
  tcg/aarch64/tcg-target-opc.h.inc |  4 +-
  tcg/arm/tcg-target-opc.h.inc     |  6 +--
  tcg/i386/tcg-target-opc.h.inc    | 22 ++++----
  tcg/ppc/tcg-target-opc.h.inc     | 12 ++---
  tcg/s390x/tcg-target-opc.h.inc   |  6 +--
 files changed, 68 insertions(+), 71 deletions(-)
 diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/tcg/tcg-opc.h
 +++ b/include/tcg/tcg-opc.h
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st_a64_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
  /* Host vector support.  */
 -#define IMPLVEC  TCG_OPF_VECTOR
 -
  DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
 -DEF(dup_vec, 1, 1, 0, IMPLVEC)
 -DEF(dup2_vec, 1, 2, 0, IMPLVEC)
 +DEF(dup_vec, 1, 1, 0, TCG_OPF_VECTOR)
 +DEF(dup2_vec, 1, 2, 0, TCG_OPF_VECTOR)
 -DEF(ld_vec, 1, 1, 1, IMPLVEC)
 -DEF(st_vec, 0, 2, 1, IMPLVEC)
 -DEF(dupm_vec, 1, 1, 1, IMPLVEC)
 +DEF(ld_vec, 1, 1, 1, TCG_OPF_VECTOR)
 +DEF(st_vec, 0, 2, 1, TCG_OPF_VECTOR)
 +DEF(dupm_vec, 1, 1, 1, TCG_OPF_VECTOR)
 -DEF(add_vec, 1, 2, 0, IMPLVEC)
 -DEF(sub_vec, 1, 2, 0, IMPLVEC)
 -DEF(mul_vec, 1, 2, 0, IMPLVEC)
 -DEF(neg_vec, 1, 1, 0, IMPLVEC)
 -DEF(abs_vec, 1, 1, 0, IMPLVEC)
 -DEF(ssadd_vec, 1, 2, 0, IMPLVEC)
 -DEF(usadd_vec, 1, 2, 0, IMPLVEC)
 -DEF(sssub_vec, 1, 2, 0, IMPLVEC)
 -DEF(ussub_vec, 1, 2, 0, IMPLVEC)
 -DEF(smin_vec, 1, 2, 0, IMPLVEC)
 -DEF(umin_vec, 1, 2, 0, IMPLVEC)
 -DEF(smax_vec, 1, 2, 0, IMPLVEC)
 -DEF(umax_vec, 1, 2, 0, IMPLVEC)
 +DEF(add_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(sub_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(mul_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(neg_vec, 1, 1, 0, TCG_OPF_VECTOR)
 +DEF(abs_vec, 1, 1, 0, TCG_OPF_VECTOR)
 +DEF(ssadd_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(usadd_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(sssub_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(ussub_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(smin_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(umin_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(smax_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(umax_vec, 1, 2, 0, TCG_OPF_VECTOR)
 -DEF(and_vec, 1, 2, 0, IMPLVEC)
 -DEF(or_vec, 1, 2, 0, IMPLVEC)
 -DEF(xor_vec, 1, 2, 0, IMPLVEC)
 -DEF(andc_vec, 1, 2, 0, IMPLVEC)
 -DEF(orc_vec, 1, 2, 0, IMPLVEC)
 -DEF(nand_vec, 1, 2, 0, IMPLVEC)
 -DEF(nor_vec, 1, 2, 0, IMPLVEC)
 -DEF(eqv_vec, 1, 2, 0, IMPLVEC)
 -DEF(not_vec, 1, 1, 0, IMPLVEC)
 +DEF(and_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(or_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(xor_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(andc_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(orc_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(nand_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(nor_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(eqv_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(not_vec, 1, 1, 0, TCG_OPF_VECTOR)
 -DEF(shli_vec, 1, 1, 1, IMPLVEC)
 -DEF(shri_vec, 1, 1, 1, IMPLVEC)
 -DEF(sari_vec, 1, 1, 1, IMPLVEC)
 -DEF(rotli_vec, 1, 1, 1, IMPLVEC)
 +DEF(shli_vec, 1, 1, 1, TCG_OPF_VECTOR)
 +DEF(shri_vec, 1, 1, 1, TCG_OPF_VECTOR)
 +DEF(sari_vec, 1, 1, 1, TCG_OPF_VECTOR)
 +DEF(rotli_vec, 1, 1, 1, TCG_OPF_VECTOR)
 -DEF(shls_vec, 1, 2, 0, IMPLVEC)
 -DEF(shrs_vec, 1, 2, 0, IMPLVEC)
 -DEF(sars_vec, 1, 2, 0, IMPLVEC)
 -DEF(rotls_vec, 1, 2, 0, IMPLVEC)
 +DEF(shls_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(shrs_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(sars_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(rotls_vec, 1, 2, 0, TCG_OPF_VECTOR)
 -DEF(shlv_vec, 1, 2, 0, IMPLVEC)
 -DEF(shrv_vec, 1, 2, 0, IMPLVEC)
 -DEF(sarv_vec, 1, 2, 0, IMPLVEC)
 -DEF(rotlv_vec, 1, 2, 0, IMPLVEC)
 -DEF(rotrv_vec, 1, 2, 0, IMPLVEC)
 +DEF(shlv_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(shrv_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(sarv_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(rotlv_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(rotrv_vec, 1, 2, 0, TCG_OPF_VECTOR)
 -DEF(cmp_vec, 1, 2, 1, IMPLVEC)
 +DEF(cmp_vec, 1, 2, 1, TCG_OPF_VECTOR)
 -DEF(bitsel_vec, 1, 3, 0, IMPLVEC)
 -DEF(cmpsel_vec, 1, 4, 1, IMPLVEC)
 +DEF(bitsel_vec, 1, 3, 0, TCG_OPF_VECTOR)
 +DEF(cmpsel_vec, 1, 4, 1, TCG_OPF_VECTOR)
  DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
  #include "tcg-target-opc.h.inc"
  #undef DATA64_ARGS
 -#undef IMPLVEC
  #undef DEF
 diff --git a/tcg/aarch64/tcg-target-opc.h.inc b/tcg/aarch64/tcg-target-opc.h.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target-opc.h.inc
 +++ b/tcg/aarch64/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
   * consider these to be UNSPEC with names.
   */
 -DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
 -DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)
 +DEF(aa64_sshl_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(aa64_sli_vec, 1, 2, 1, TCG_OPF_VECTOR)
 diff --git a/tcg/arm/tcg-target-opc.h.inc b/tcg/arm/tcg-target-opc.h.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target-opc.h.inc
 +++ b/tcg/arm/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
   * consider these to be UNSPEC with names.
   */
 -DEF(arm_sli_vec, 1, 2, 1, IMPLVEC)
 -DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
 -DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
 +DEF(arm_sli_vec, 1, 2, 1, TCG_OPF_VECTOR)
 +DEF(arm_sshl_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(arm_ushl_vec, 1, 2, 0, TCG_OPF_VECTOR)
 diff --git a/tcg/i386/tcg-target-opc.h.inc b/tcg/i386/tcg-target-opc.h.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target-opc.h.inc
 +++ b/tcg/i386/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
   * consider these to be UNSPEC with names.
   */
 -DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC)
 -DEF(x86_blend_vec, 1, 2, 1, IMPLVEC)
 -DEF(x86_packss_vec, 1, 2, 0, IMPLVEC)
 -DEF(x86_packus_vec, 1, 2, 0, IMPLVEC)
 -DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
 -DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
 -DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
 -DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
 -DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC)
 -DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC)
 -DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC)
 +DEF(x86_shufps_vec, 1, 2, 1, TCG_OPF_VECTOR)
 +DEF(x86_blend_vec, 1, 2, 1, TCG_OPF_VECTOR)
 +DEF(x86_packss_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(x86_packus_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(x86_psrldq_vec, 1, 1, 1, TCG_OPF_VECTOR)
 +DEF(x86_vperm2i128_vec, 1, 2, 1, TCG_OPF_VECTOR)
 +DEF(x86_punpckl_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(x86_punpckh_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(x86_vpshldi_vec, 1, 2, 1, TCG_OPF_VECTOR)
 +DEF(x86_vpshldv_vec, 1, 3, 0, TCG_OPF_VECTOR)
 +DEF(x86_vpshrdv_vec, 1, 3, 0, TCG_OPF_VECTOR)
 diff --git a/tcg/ppc/tcg-target-opc.h.inc b/tcg/ppc/tcg-target-opc.h.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target-opc.h.inc
 +++ b/tcg/ppc/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
   * consider these to be UNSPEC with names.
   */
 -DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC)
 -DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC)
 -DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
 -DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
 -DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
 -DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
 +DEF(ppc_mrgh_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(ppc_mrgl_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(ppc_msum_vec, 1, 3, 0, TCG_OPF_VECTOR)
 +DEF(ppc_muleu_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(ppc_mulou_vec, 1, 2, 0, TCG_OPF_VECTOR)
 +DEF(ppc_pkum_vec, 1, 2, 0, TCG_OPF_VECTOR)
 diff --git a/tcg/s390x/tcg-target-opc.h.inc b/tcg/s390x/tcg-target-opc.h.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target-opc.h.inc
 +++ b/tcg/s390x/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
   * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
   * consider these to be UNSPEC with names.
   */
 -DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
 -DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
 -DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)
 +DEF(s390_vuph_vec, 1, 1, 0, TCG_OPF_VECTOR)
 +DEF(s390_vupl_vec, 1, 1, 0, TCG_OPF_VECTOR)
 +DEF(s390_vpks_vec, 1, 2, 0, TCG_OPF_VECTOR)
 --
 .43.0

-[PULL 02/56] host-utils: move checks out of divu128/divs128
+[PULL 46/68] tcg/mips: Expand bswap unconditionally
-From: Luis Pires <luis.pires@eldorado.org.br>
+We always provide bswap subroutines, whether they are optimized
 using mips32r2 when available or not.
-In preparation for changing the divu128/divs128 implementations
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 to allow for quotients larger than 64 bits, move the div-by-zero
 and overflow checks to the callers.
 Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/hw/clock.h        |  5 +++--
+ tcg/mips/tcg-target-has.h | 8 ++++----
- include/qemu/host-utils.h | 34 ++++++++++++---------------------
+file changed, 4 insertions(+), 4 deletions(-)
  target/ppc/int_helper.c   | 14 +++++++++-----
  util/host-utils.c         | 40 ++++++++++++++++++---------------------
 files changed, 42 insertions(+), 51 deletions(-)
-diff --git a/include/hw/clock.h b/include/hw/clock.h
+diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/hw/clock.h
+--- a/tcg/mips/tcg-target-has.h
-+++ b/include/hw/clock.h
++++ b/tcg/mips/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
+@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
-         return 0;
+ #define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
-     }
+ #define TCG_TARGET_HAS_muluh_i32        1
-     /*
+ #define TCG_TARGET_HAS_mulsh_i32        1
--     * Ignore divu128() return value as we've caught div-by-zero and don't
++#define TCG_TARGET_HAS_bswap16_i32      1
--     * need different behaviour for overflow.
+ #define TCG_TARGET_HAS_bswap32_i32      1
-+     * BUG: when CONFIG_INT128 is not defined, the current implementation of
+ #define TCG_TARGET_HAS_negsetcond_i32   0
-+     * divu128 does not return a valid truncated quotient, so the result will
-+     * be wrong.
+@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
       */
      divu128(&lo, &hi, clk->period);
      return lo;
 diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/qemu/host-utils.h
 +++ b/include/qemu/host-utils.h
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
      return (__int128_t)a * b / c;
  }
 -static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 +static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
  {
 -    if (divisor == 0) {
 -        return 1;
 -    } else {
 -        __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
 -        __uint128_t result = dividend / divisor;
 -        *plow = result;
 -        *phigh = dividend % divisor;
 -        return result > UINT64_MAX;
 -    }
 +    __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
 +    __uint128_t result = dividend / divisor;
 +    *plow = result;
 +    *phigh = dividend % divisor;
  }
 -static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
 +static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
  {
 -    if (divisor == 0) {
 -        return 1;
 -    } else {
 -        __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
 -        __int128_t result = dividend / divisor;
 -        *plow = result;
 -        *phigh = dividend % divisor;
 -        return result != *plow;
 -    }
 +    __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
 +    __int128_t result = dividend / divisor;
 +    *plow = result;
 +    *phigh = dividend % divisor;
  }
  #else
  void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
  void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
 -int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
 -int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
 +void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
 +void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
  static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  {
 diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/ppc/int_helper.c
 +++ b/target/ppc/int_helper.c
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
      uint64_t rt = 0;
      int overflow = 0;
 -    overflow = divu128(&rt, &ra, rb);
 -
 -    if (unlikely(overflow)) {
 +    if (unlikely(rb == 0 || ra >= rb)) {
 +        overflow = 1;
          rt = 0; /* Undefined */
 +    } else {
 +        divu128(&rt, &ra, rb);
      }
      if (oe) {
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
      int64_t rt = 0;
      int64_t ra = (int64_t)rau;
      int64_t rb = (int64_t)rbu;
 -    int overflow = divs128(&rt, &ra, rb);
 +    int overflow = 0;
 -    if (unlikely(overflow)) {
 +    if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
 +        overflow = 1;
          rt = 0; /* Undefined */
 +    } else {
 +        divs128(&rt, &ra, rb);
      }
      if (oe) {
 diff --git a/util/host-utils.c b/util/host-utils.c
 index XXXXXXX..XXXXXXX 100644
 --- a/util/host-utils.c
 +++ b/util/host-utils.c
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
      *phigh = rh;
  }
 -/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */
 -/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */
 -/* remainder via phigh. */
 -int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 +/*
 + * Unsigned 128-by-64 division. Returns quotient via plow and
 + * remainder via phigh.
 + * The result must fit in 64 bits (plow) - otherwise, the result
 + * is undefined.
 + * This function will cause a division by zero if passed a zero divisor.
 + */
 +void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
  {
      uint64_t dhi = *phigh;
      uint64_t dlo = *plow;
      unsigned i;
      uint64_t carry = 0;
 -    if (divisor == 0) {
 -        return 1;
 -    } else if (dhi == 0) {
 +    if (divisor == 0 || dhi == 0) {
          *plow  = dlo / divisor;
          *phigh = dlo % divisor;
 -        return 0;
 -    } else if (dhi >= divisor) {
 -        return 1;
      } else {
          for (i = 0; i < 64; i++) {
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
          *plow = dlo;
          *phigh = dhi;
 -        return 0;
      }
  }
 -int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
 +/*
 + * Signed 128-by-64 division. Returns quotient via plow and
 + * remainder via phigh.
 + * The result must fit in 64 bits (plow) - otherwise, the result
 + * is undefined.
 + * This function will cause a division by zero if passed a zero divisor.
 + */
 +void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
  {
      int sgn_dvdnd = *phigh < 0;
      int sgn_divsr = divisor < 0;
 -    int overflow = 0;
      if (sgn_dvdnd) {
          *plow = ~(*plow);
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
          divisor = 0 - divisor;
      }
 -    overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
 +    divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
      if (sgn_dvdnd  ^ sgn_divsr) {
          *plow = 0 - *plow;
      }
 -
 -    if (!overflow) {
 -        if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
 -            overflow = 1;
 -        }
 -    }
 -
 -    return overflow;
  }
  #endif
+ /* optional instructions detected at runtime */
+-#define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
+ #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
+ #define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
+ #define TCG_TARGET_HAS_sextract_i32     0
+@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
+ #define TCG_TARGET_HAS_qemu_st8_i32     0
+ #if TCG_TARGET_REG_BITS == 64
+-#define TCG_TARGET_HAS_bswap16_i64      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_bswap32_i64      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_bswap64_i64      use_mips32r2_instructions
++#define TCG_TARGET_HAS_bswap16_i64      1
++#define TCG_TARGET_HAS_bswap32_i64      1
++#define TCG_TARGET_HAS_bswap64_i64      1
+ #define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
+ #define TCG_TARGET_HAS_extract_i64      use_mips32r2_instructions
+ #define TCG_TARGET_HAS_sextract_i64     0
 --
-.25.1
+.43.0

-[PULL 55/56] tcg/optimize: Propagate sign info for bit counting
+[PULL 47/68] tcg/i386: Handle all 8-bit extensions for i686
-The results are generally 6 bit unsigned values, though
+When we generalize {s}extract_i32, we'll lose the
-the count leading and trailing bits may produce any value
+specific register constraints on ext8u and ext8s.
-for a zero input.
+It's just as easy to emit a couple of insns instead.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 3 ++-
+ tcg/i386/tcg-target.c.inc | 23 +++++++++++++++++++----
-file changed, 2 insertions(+), 1 deletion(-)
+file changed, 19 insertions(+), 4 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/i386/tcg-target.c.inc
-+++ b/tcg/optimize.c
++++ b/tcg/i386/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg)
-         g_assert_not_reached();
-     }
+ static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
-     ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
+ {
--
+-    /* movzbl */
-+    ctx->s_mask = smask_from_zmask(ctx->z_mask);
+-    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
-     return false;
++    if (TCG_TARGET_REG_BITS == 32 && src >= 4) {
 +        tcg_out_mov(s, TCG_TYPE_I32, dest, src);
 +        if (dest >= 4) {
 +            tcg_out_modrm(s, OPC_ARITH_EvIz, ARITH_AND, dest);
 +            tcg_out32(s, 0xff);
 +            return;
 +        }
 +        src = dest;
 +    }
      tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
+ static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
-     default:
+ {
-         g_assert_not_reached();
+     int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
-     }
+-    /* movsbl */
-+    ctx->s_mask = smask_from_zmask(ctx->z_mask);
+-    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
-     return false;
++
 +    if (TCG_TARGET_REG_BITS == 32 && src >= 4) {
 +        tcg_out_mov(s, TCG_TYPE_I32, dest, src);
 +        if (dest >= 4) {
 +            tcg_out_shifti(s, SHIFT_SHL, dest, 24);
 +            tcg_out_shifti(s, SHIFT_SAR, dest, 24);
 +            return;
 +        }
 +        src = dest;
 +    }
      tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
  }
 --
-.25.1
+.43.0

-[PULL 08/56] tcg/optimize: Remove do_default label
+[PULL 48/68] tcg/i386: Fold the ext{8, 16, 32}[us] cases into {s}extract
-Break the final cleanup clause out of the main switch
+Accept byte and word extensions with the extract opcodes.
-statement.  When fully folding an opcode to mov/movi,
+This is preparatory to removing the specialized extracts.
-use "continue" to process the next opcode, else break
-to fall into the final cleanup.
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
+ tcg/i386/tcg-target-has.h | 49 +++++++++++++++++++++++++++----
-file changed, 94 insertions(+), 96 deletions(-)
+ tcg/tcg-has.h             | 12 +++++---
+ tcg/optimize.c            |  8 +++--
  tcg/tcg-op.c              | 12 +++-----
  tcg/i386/tcg-target.c.inc | 62 +++++++++++++++++++++++++++++----------
 files changed, 107 insertions(+), 36 deletions(-)
 diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target-has.h
 +++ b/tcg/i386/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
  #define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract_i64      1
 -#define TCG_TARGET_HAS_sextract_i64     0
 +#define TCG_TARGET_HAS_sextract_i64     1
  #define TCG_TARGET_HAS_extract2_i64     1
  #define TCG_TARGET_HAS_negsetcond_i64   1
  #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
       (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
  #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
 -/* Check for the possibility of high-byte extraction and, for 64-bit,
 -   zero-extending 32-bit right-shift.  */
 -#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
 -#define TCG_TARGET_extract_i64_valid(ofs, len) \
 -    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
 +/*
 + * Check for the possibility of low byte/word extraction, high-byte extraction
 + * and zero-extending 32-bit right-shift.
 + *
 + * We cannot sign-extend from high byte to 64-bits without using the
 + * REX prefix that explicitly excludes access to the high-byte registers.
 + */
 +static inline bool
 +tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
 +    switch (ofs) {
 +    case 0:
 +        switch (len) {
 +        case 8:
 +        case 16:
 +            return true;
 +        case 32:
 +            return type == TCG_TYPE_I64;
 +        }
 +        return false;
 +    case 8:
 +        return len == 8 && type == TCG_TYPE_I32;
 +    }
 +    return false;
 +}
 +#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 +
 +static inline bool
 +tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
 +    if (type == TCG_TYPE_I64 && ofs + len == 32) {
 +        return true;
 +    }
 +    switch (ofs) {
 +    case 0:
 +        return len == 8 || len == 16;
 +    case 8:
 +        return len == 8;
 +    }
 +    return false;
 +}
 +#define TCG_TARGET_extract_valid  tcg_target_extract_valid
  #endif
 diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-has.h
 +++ b/tcg/tcg-has.h
@@ -XXX,XX +XXX,XX @@
  #ifndef TCG_TARGET_deposit_i64_valid
  #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
  #endif
 -#ifndef TCG_TARGET_extract_i32_valid
 -#define TCG_TARGET_extract_i32_valid(ofs, len) 1
 +#ifndef TCG_TARGET_extract_valid
 +#define TCG_TARGET_extract_valid(type, ofs, len) \
 +    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
 +     : TCG_TARGET_HAS_extract_i64)
  #endif
 -#ifndef TCG_TARGET_extract_i64_valid
 -#define TCG_TARGET_extract_i64_valid(ofs, len) 1
 +#ifndef TCG_TARGET_sextract_valid
 +#define TCG_TARGET_sextract_valid(type, ofs, len) \
 +    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
 +     : TCG_TARGET_HAS_sextract_i64)
  #endif
  /* Only one of DIV or DIV2 should be defined.  */
 diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/optimize.c
 +++ b/tcg/optimize.c
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
-         switch (opc) {
+         xor_opc = INDEX_op_xor_i32;
-         CASE_OP_32_64_VEC(mov):
+         shr_opc = INDEX_op_shr_i32;
-             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+         neg_opc = INDEX_op_neg_i32;
--            break;
+-        if (TCG_TARGET_extract_i32_valid(sh, 1)) {
-+            continue;
++        if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
+             uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
-         case INDEX_op_dup_vec:
++        }
-             if (arg_is_const(op->args[1])) {
++        if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
-                 tmp = arg_info(op->args[1])->val;
+             sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
-                 tmp = dup_const(TCGOP_VECE(op), tmp);
+         }
-                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+         break;
--                break;
+@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
-+                continue;
+         xor_opc = INDEX_op_xor_i64;
-             }
+         shr_opc = INDEX_op_shr_i64;
--            goto do_default;
+         neg_opc = INDEX_op_neg_i64;
-+            break;
+-        if (TCG_TARGET_extract_i64_valid(sh, 1)) {
++        if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
-         case INDEX_op_dup2_vec:
+             uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
-             assert(TCG_TARGET_REG_BITS == 32);
++        }
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++        if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
-                 tcg_opt_gen_movi(s, &ctx, op, op->args[0],
+             sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
-                                  deposit64(arg_info(op->args[1])->val, 32, 32,
+         }
-                                            arg_info(op->args[2])->val));
+         break;
--                break;
+diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
-+                continue;
+index XXXXXXX..XXXXXXX 100644
-             } else if (args_are_copies(op->args[1], op->args[2])) {
+--- a/tcg/tcg-op.c
-                 op->opc = INDEX_op_dup_vec;
++++ b/tcg/tcg-op.c
-                 TCGOP_VECE(op) = MO_32;
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
-                 nb_iargs = 1;
+         return;
-             }
+     }
--            goto do_default;
-+            break;
+-    if (TCG_TARGET_HAS_extract_i32
+-        && TCG_TARGET_extract_i32_valid(ofs, len)) {
-         CASE_OP_32_64(not):
++    if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) {
-         CASE_OP_32_64(neg):
+         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+         return;
-             if (arg_is_const(op->args[1])) {
+     }
-                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
-                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+         }
--                break;
+     }
-+                continue;
-             }
+-    if (TCG_TARGET_HAS_sextract_i32
--            goto do_default;
+-        && TCG_TARGET_extract_i32_valid(ofs, len)) {
-+            break;
++    if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) {
+         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
-         CASE_OP_32_64(bswap16):
+         return;
-         CASE_OP_32_64(bswap32):
+     }
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
-                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
+         goto do_shift_and;
-                                           op->args[2]);
+     }
-                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
--                break;
+-    if (TCG_TARGET_HAS_extract_i64
-+                continue;
+-        && TCG_TARGET_extract_i64_valid(ofs, len)) {
-             }
++    if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) {
--            goto do_default;
+         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
-+            break;
+         return;
+     }
-         CASE_OP_32_64(add):
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
-         CASE_OP_32_64(sub):
+         return;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+     }
-                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
-                                           arg_info(op->args[2])->val);
+-    if (TCG_TARGET_HAS_sextract_i64
-                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+-        && TCG_TARGET_extract_i64_valid(ofs, len)) {
--                break;
++    if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) {
-+                continue;
+         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
-             }
+         return;
--            goto do_default;
+     }
-+            break;
+diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
-         CASE_OP_32_64(clz):
+--- a/tcg/i386/tcg-target.c.inc
-         CASE_OP_32_64(ctz):
++++ b/tcg/i386/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-                 } else {
-                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
+     case INDEX_op_extract_i64:
-                 }
+         if (a2 + args[3] == 32) {
--                break;
++            if (a2 == 0) {
-+                continue;
++                tcg_out_ext32u(s, a0, a1);
              }
 -            goto do_default;
 +            break;
          CASE_OP_32_64(deposit):
              if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                  op->args[3], op->args[4],
                                  arg_info(op->args[2])->val);
                  tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 -                break;
 +                continue;
              }
 -            goto do_default;
 +            break;
          CASE_OP_32_64(extract):
              if (arg_is_const(op->args[1])) {
                  tmp = extract64(arg_info(op->args[1])->val,
                                  op->args[2], op->args[3]);
                  tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 -                break;
 +                continue;
              }
 -            goto do_default;
 +            break;
          CASE_OP_32_64(sextract):
              if (arg_is_const(op->args[1])) {
                  tmp = sextract64(arg_info(op->args[1])->val,
                                   op->args[2], op->args[3]);
                  tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 -                break;
 +                continue;
              }
 -            goto do_default;
 +            break;
          CASE_OP_32_64(extract2):
              if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                      ((uint32_t)v2 << (32 - shr)));
                  }
                  tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 -                break;
 +                continue;
              }
 -            goto do_default;
 +            break;
          CASE_OP_32_64(setcond):
              tmp = do_constant_folding_cond(opc, op->args[1],
                                             op->args[2], op->args[3]);
              if (tmp != 2) {
                  tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
 -                break;
 +                continue;
              }
 -            goto do_default;
 +            break;
          CASE_OP_32_64(brcond):
              tmp = do_constant_folding_cond(opc, op->args[0],
                                             op->args[1], op->args[2]);
 -            if (tmp != 2) {
 -                if (tmp) {
 -                    memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
 -                    op->opc = INDEX_op_br;
 -                    op->args[0] = op->args[3];
 -                } else {
 -                    tcg_op_remove(s, op);
 -                }
 +            switch (tmp) {
 +            case 0:
 +                tcg_op_remove(s, op);
 +                continue;
 +            case 1:
 +                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
 +                op->opc = opc = INDEX_op_br;
 +                op->args[0] = op->args[3];
                  break;
              }
 -            goto do_default;
 +            break;
          CASE_OP_32_64(movcond):
              tmp = do_constant_folding_cond(opc, op->args[1],
                                             op->args[2], op->args[5]);
              if (tmp != 2) {
                  tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
 -                break;
 +                continue;
              }
              if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
                  uint64_t tv = arg_info(op->args[3])->val;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  if (fv == 1 && tv == 0) {
                      cond = tcg_invert_cond(cond);
                  } else if (!(tv == 1 && fv == 0)) {
 -                    goto do_default;
 +                    break;
                  }
                  op->args[3] = cond;
                  op->opc = opc = (opc == INDEX_op_movcond_i32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                   : INDEX_op_setcond_i64);
                  nb_iargs = 2;
              }
 -            goto do_default;
 +            break;
          case INDEX_op_add2_i32:
          case INDEX_op_sub2_i32:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  rh = op->args[1];
                  tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
                  tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
 -                break;
 +                continue;
              }
 -            goto do_default;
 +            break;
          case INDEX_op_mulu2_i32:
              if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  rh = op->args[1];
                  tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
                  tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
 -                break;
 +                continue;
              }
 -            goto do_default;
 +            break;
          case INDEX_op_brcond2_i32:
              tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
                                              op->args[4]);
 -            if (tmp != 2) {
 -                if (tmp) {
 -            do_brcond_true:
 -                    memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
 -                    op->opc = INDEX_op_br;
 -                    op->args[0] = op->args[5];
 -                } else {
 +            if (tmp == 0) {
              do_brcond_false:
 -                    tcg_op_remove(s, op);
 -                }
 -            } else if ((op->args[4] == TCG_COND_LT
 -                        || op->args[4] == TCG_COND_GE)
 -                       && arg_is_const(op->args[2])
 -                       && arg_info(op->args[2])->val == 0
 -                       && arg_is_const(op->args[3])
 -                       && arg_info(op->args[3])->val == 0) {
 +                tcg_op_remove(s, op);
 +                continue;
 +            }
 +            if (tmp == 1) {
 +            do_brcond_true:
 +                op->opc = opc = INDEX_op_br;
 +                op->args[0] = op->args[5];
 +                break;
 +            }
-+            if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
+             /* This is a 32-bit zero-extending right shift.  */
-+                 && arg_is_const(op->args[2])
+             tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
-+                 && arg_info(op->args[2])->val == 0
+             tcg_out_shifti(s, SHIFT_SHR, a0, a2);
-+                 && arg_is_const(op->args[3])
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-+                 && arg_info(op->args[3])->val == 0) {
+         }
-                 /* Simplify LT/GE comparisons vs zero to a single compare
+         /* FALLTHRU */
-                    vs the high word of the input.  */
+     case INDEX_op_extract_i32:
-             do_brcond_high:
+-        /* On the off-chance that we can use the high-byte registers.
--                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
+-           Otherwise we emit the same ext16 + shift pattern that we
--                op->opc = INDEX_op_brcond_i32;
+-           would have gotten from the normal tcg-op.c expansion.  */
-+                op->opc = opc = INDEX_op_brcond_i32;
+-        tcg_debug_assert(a2 == 8 && args[3] == 8);
-                 op->args[0] = op->args[1];
+-        if (a1 < 4 && a0 < 8) {
-                 op->args[1] = op->args[3];
+-            tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
-                 op->args[2] = op->args[4];
+-        } else {
-                 op->args[3] = op->args[5];
++        if (a2 == 0 && args[3] == 8) {
--            } else if (op->args[4] == TCG_COND_EQ) {
++            tcg_out_ext8u(s, a0, a1);
-+                break;
++        } else if (a2 == 0 && args[3] == 16) {
              tcg_out_ext16u(s, a0, a1);
 -            tcg_out_shifti(s, SHIFT_SHR, a0, 8);
 +        } else if (a2 == 8 && args[3] == 8) {
 +            /*
 +             * On the off-chance that we can use the high-byte registers.
 +             * Otherwise we emit the same ext16 + shift pattern that we
 +             * would have gotten from the normal tcg-op.c expansion.
 +             */
 +            if (a1 < 4 && a0 < 8) {
 +                tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
 +            } else {
 +                tcg_out_ext16u(s, a0, a1);
 +                tcg_out_shifti(s, SHIFT_SHR, a0, 8);
 +            }
-+            if (op->args[4] == TCG_COND_EQ) {
++        } else {
-                 /* Simplify EQ comparisons where one of the pairs
++            g_assert_not_reached();
-                    can be simplified.  */
++        }
-                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
++        break;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++
-                 if (tmp == 0) {
++    case INDEX_op_sextract_i64:
-                     goto do_brcond_false;
++        if (a2 == 0 && args[3] == 8) {
-                 } else if (tmp != 1) {
++            tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1);
--                    goto do_default;
++        } else if (a2 == 0 && args[3] == 16) {
-+                    break;
++            tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1);
-                 }
++        } else if (a2 == 0 && args[3] == 32) {
-             do_brcond_low:
++            tcg_out_ext32s(s, a0, a1);
-                 memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
++        } else {
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++            g_assert_not_reached();
-                 op->args[1] = op->args[2];
+         }
-                 op->args[2] = op->args[4];
+         break;
-                 op->args[3] = op->args[5];
--            } else if (op->args[4] == TCG_COND_NE) {
+     case INDEX_op_sextract_i32:
-+                break;
+-        /* We don't implement sextract_i64, as we cannot sign-extend to
 -           64-bits without using the REX prefix that explicitly excludes
 -           access to the high-byte registers.  */
 -        tcg_debug_assert(a2 == 8 && args[3] == 8);
 -        if (a1 < 4 && a0 < 8) {
 -            tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
 -        } else {
 +        if (a2 == 0 && args[3] == 8) {
 +            tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1);
 +        } else if (a2 == 0 && args[3] == 16) {
              tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
 -            tcg_out_shifti(s, SHIFT_SAR, a0, 8);
 +        } else if (a2 == 8 && args[3] == 8) {
 +            if (a1 < 4 && a0 < 8) {
 +                tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
 +            } else {
 +                tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
 +                tcg_out_shifti(s, SHIFT_SAR, a0, 8);
 +            }
-+            if (op->args[4] == TCG_COND_NE) {
-                 /* Simplify NE comparisons where one of the pairs
-                    can be simplified.  */
-                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-                 } else if (tmp == 1) {
-                     goto do_brcond_true;
-                 }
--                goto do_default;
--            } else {
--                goto do_default;
-             }
-             break;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-             if (tmp != 2) {
-             do_setcond_const:
-                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
--            } else if ((op->args[5] == TCG_COND_LT
--                        || op->args[5] == TCG_COND_GE)
--                       && arg_is_const(op->args[3])
--                       && arg_info(op->args[3])->val == 0
--                       && arg_is_const(op->args[4])
--                       && arg_info(op->args[4])->val == 0) {
-+                continue;
-+            }
-+            if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
-+                 && arg_is_const(op->args[3])
-+                 && arg_info(op->args[3])->val == 0
-+                 && arg_is_const(op->args[4])
-+                 && arg_info(op->args[4])->val == 0) {
-                 /* Simplify LT/GE comparisons vs zero to a single compare
-                    vs the high word of the input.  */
-             do_setcond_high:
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-                 op->args[1] = op->args[2];
-                 op->args[2] = op->args[4];
-                 op->args[3] = op->args[5];
--            } else if (op->args[5] == TCG_COND_EQ) {
-+                break;
-+            }
-+            if (op->args[5] == TCG_COND_EQ) {
-                 /* Simplify EQ comparisons where one of the pairs
-                    can be simplified.  */
-                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-                 if (tmp == 0) {
-                     goto do_setcond_high;
-                 } else if (tmp != 1) {
--                    goto do_default;
-+                    break;
-                 }
-             do_setcond_low:
-                 reset_temp(op->args[0]);
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-                 op->opc = INDEX_op_setcond_i32;
-                 op->args[2] = op->args[3];
-                 op->args[3] = op->args[5];
--            } else if (op->args[5] == TCG_COND_NE) {
-+                break;
-+            }
-+            if (op->args[5] == TCG_COND_NE) {
-                 /* Simplify NE comparisons where one of the pairs
-                    can be simplified.  */
-                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-                 } else if (tmp == 1) {
-                     goto do_setcond_const;
-                 }
--                goto do_default;
--            } else {
--                goto do_default;
-             }
-             break;
--        case INDEX_op_call:
--            if (!(tcg_call_flags(op)
-+        default:
-+            break;
-+        }
-+
-+        /* Some of the folding above can change opc. */
-+        opc = op->opc;
-+        def = &tcg_op_defs[opc];
-+        if (def->flags & TCG_OPF_BB_END) {
-+            memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
 +        } else {
-+            if (opc == INDEX_op_call &&
++            g_assert_not_reached();
-+                !(tcg_call_flags(op)
+         }
-                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
+         break;
-                 for (i = 0; i < nb_globals; i++) {
-                     if (test_bit(i, ctx.temps_used.l)) {
+@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+     case INDEX_op_extract_i32:
-                     }
+     case INDEX_op_extract_i64:
-                 }
+     case INDEX_op_sextract_i32:
-             }
++    case INDEX_op_sextract_i64:
--            goto do_reset_output;
+     case INDEX_op_ctpop_i32:
+     case INDEX_op_ctpop_i64:
--        default:
+         return C_O1_I1(r, r);
 -        do_default:
 -            /* Default case: we know nothing about operation (or were unable
 -               to compute the operation result) so no propagation is done.
 -               We trash everything if the operation is the end of a basic
 -               block, otherwise we only trash the output args.  "z_mask" is
 -               the non-zero bits mask for the first output arg.  */
 -            if (def->flags & TCG_OPF_BB_END) {
 -                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
 -            } else {
 -        do_reset_output:
 -                for (i = 0; i < nb_oargs; i++) {
 -                    reset_temp(op->args[i]);
 -                    /* Save the corresponding known-zero bits mask for the
 -                       first output argument (only one supported so far). */
 -                    if (i == 0) {
 -                        arg_info(op->args[i])->z_mask = z_mask;
 -                    }
 +            for (i = 0; i < nb_oargs; i++) {
 +                reset_temp(op->args[i]);
 +                /* Save the corresponding known-zero bits mask for the
 +                   first output argument (only one supported so far). */
 +                if (i == 0) {
 +                    arg_info(op->args[i])->z_mask = z_mask;
                  }
              }
 -            break;
          }
          /* Eliminate duplicate and redundant fence instructions.  */
 --
-.25.1
+.43.0

-[PULL 18/56] tcg/optimize: Use a boolean to avoid a mass of continues
+[PULL 49/68] tcg/aarch64: Provide TCG_TARGET_{s}extract_valid
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Trivially mirrors TCG_TARGET_HAS_{s}extract_*.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 9 ++++++---
+ tcg/aarch64/tcg-target-has.h | 3 +++
-file changed, 6 insertions(+), 3 deletions(-)
+file changed, 3 insertions(+)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/aarch64/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/aarch64/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@
-         uint64_t z_mask, partmask, affected, tmp;
+ #define TCG_TARGET_HAS_cmpsel_vec       0
-         TCGOpcode opc = op->opc;
+ #define TCG_TARGET_HAS_tst_vec          1
-         const TCGOpDef *def;
-+        bool done = false;
++#define TCG_TARGET_extract_valid(type, ofs, len)   1
++#define TCG_TARGET_sextract_valid(type, ofs, len)  1
-         /* Calls are special. */
++
-         if (opc == INDEX_op_call) {
+ #endif
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             allocator where needed and possible.  Also detect copies. */
          switch (opc) {
          CASE_OP_32_64_VEC(mov):
 -            tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
 -            continue;
 +            done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
 +            break;
          case INDEX_op_dup_vec:
              if (arg_is_const(op->args[1])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          }
 -        finish_folding(&ctx, op);
 +        if (!done) {
 +            finish_folding(&ctx, op);
 +        }
          /* Eliminate duplicate and redundant fence instructions.  */
          if (ctx.prev_mb) {
 --
-.25.1
+.43.0

-[PULL 17/56] tcg/optimize: Split out finish_folding
+[PULL 50/68] tcg/aarch64: Expand extract with offset 0 with andi
-Copy z_mask into OptContext, for writeback to the
+We're about to change canonicalization of masks as extract
-first output within the new function.
+instead of and.  Retain the andi expansion here.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
+ tcg/aarch64/tcg-target.c.inc | 7 ++++++-
-file changed, 33 insertions(+), 16 deletions(-)
+file changed, 6 insertions(+), 1 deletion(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/aarch64/tcg-target.c.inc
-+++ b/tcg/optimize.c
++++ b/tcg/aarch64/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
-     TCGContext *tcg;
-     TCGOp *prev_mb;
+     case INDEX_op_extract_i64:
-     TCGTempSet temps_used;
+     case INDEX_op_extract_i32:
-+
+-        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
-+    /* In flight values from optimization. */
++        if (a2 == 0) {
-+    uint64_t z_mask;
++            uint64_t mask = MAKE_64BIT_MASK(0, args[3]);
- } OptContext;
++            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, mask);
++        } else {
- static inline TempOptInfo *ts_info(TCGTemp *ts)
++            tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
      }
  }
 +static void finish_folding(OptContext *ctx, TCGOp *op)
 +{
 +    const TCGOpDef *def = &tcg_op_defs[op->opc];
 +    int i, nb_oargs;
 +
 +    /*
 +     * For an opcode that ends a BB, reset all temp data.
 +     * We do no cross-BB optimization.
 +     */
 +    if (def->flags & TCG_OPF_BB_END) {
 +        memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
 +        ctx->prev_mb = NULL;
 +        return;
 +    }
 +
 +    nb_oargs = def->nb_oargs;
 +    for (i = 0; i < nb_oargs; i++) {
 +        reset_temp(op->args[i]);
 +        /*
 +         * Save the corresponding known-zero bits mask for the
 +         * first output argument (only one supported so far).
 +         */
 +        if (i == 0) {
 +            arg_info(op->args[i])->z_mask = ctx->z_mask;
 +        }
-+    }
+         break;
-+}
-+
+     case INDEX_op_sextract_i64:
  static bool fold_call(OptContext *ctx, TCGOp *op)
  {
      TCGContext *s = ctx->tcg;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              partmask &= 0xffffffffu;
              affected &= 0xffffffffu;
          }
 +        ctx.z_mask = z_mask;
          if (partmask == 0) {
              tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          }
 -        /* Some of the folding above can change opc. */
 -        opc = op->opc;
 -        def = &tcg_op_defs[opc];
 -        if (def->flags & TCG_OPF_BB_END) {
 -            memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
 -        } else {
 -            int nb_oargs = def->nb_oargs;
 -            for (i = 0; i < nb_oargs; i++) {
 -                reset_temp(op->args[i]);
 -                /* Save the corresponding known-zero bits mask for the
 -                   first output argument (only one supported so far). */
 -                if (i == 0) {
 -                    arg_info(op->args[i])->z_mask = z_mask;
 -                }
 -            }
 -        }
 +        finish_folding(&ctx, op);
          /* Eliminate duplicate and redundant fence instructions.  */
          if (ctx.prev_mb) {
 --
-.25.1
+.43.0

-[PULL 36/56] tcg/optimize: Split out fold_xx_to_x
+[PULL 51/68] tcg/arm: Add full [US]XT[BH] into {s}extract
-Pull the "op r, a, a => mov r, a" optimization into a function,
+The armv6 uxt and sxt opcodes have a 2-bit rotate field
-and use it in the outer opcode fold functions.
+which supports extractions from ofs = {0,8,16,24}.
 Special case ofs = 0, len <= 8 as AND.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
+ tcg/arm/tcg-target-has.h | 21 ++++++++++++++--
-file changed, 24 insertions(+), 15 deletions(-)
+ tcg/arm/tcg-target.c.inc | 54 +++++++++++++++++++++++++++++++++++-----
 files changed, 67 insertions(+), 8 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/arm/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/arm/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
-     return false;
+ #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
- }
+ #define TCG_TARGET_HAS_ctpop_i32        0
+ #define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
-+/* If the binary operation has both arguments equal, fold to identity. */
+-#define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
-+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
+-#define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
 +#define TCG_TARGET_HAS_extract_i32      1
 +#define TCG_TARGET_HAS_sextract_i32     1
  #define TCG_TARGET_HAS_extract2_i32     1
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_mulu2_i32        1
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
  #define TCG_TARGET_HAS_cmpsel_vec       0
  #define TCG_TARGET_HAS_tst_vec          1
 +static inline bool
 +tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
-+    if (args_are_copies(op->args[1], op->args[2])) {
++    if (use_armv7_instructions) {
-+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
++        return true;  /* SBFX or UBFX */
 +    }
 +    switch (len) {
 +    case 8:   /* SXTB or UXTB */
 +    case 16:  /* SXTH or UXTH */
 +        return (ofs % 8) == 0;
 +    }
 +    return false;
 +}
 +
- /*
++#define TCG_TARGET_extract_valid   tcg_target_extract_valid
-  * These outermost fold_<op> functions are sorted alphabetically.
++#define TCG_TARGET_sextract_valid  tcg_target_extract_valid
-+ *
++
-+ * The ordering of the transformations should be:
+ #endif
-+ *   1) those that produce a constant
+diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
-+ *   2) those that produce a copy
+index XXXXXXX..XXXXXXX 100644
-+ *   3) those that produce information about the result value.
+--- a/tcg/arm/tcg-target.c.inc
-  */
++++ b/tcg/arm/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
- static bool fold_add(OptContext *ctx, TCGOp *op)
+ static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
-@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
+                             TCGReg rn, int ofs, int len)
  static bool fold_and(OptContext *ctx, TCGOp *op)
  {
--    return fold_const2(ctx, op);
+-    /* ubfx */
-+    if (fold_const2(ctx, op) ||
+-    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
-+        fold_xx_to_x(ctx, op)) {
+-              | (ofs << 7) | ((len - 1) << 16));
-+        return true;
++    /* According to gcc, AND can be faster. */
 +    if (ofs == 0 && len <= 8) {
 +        tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn,
 +                        encode_imm_nofail((1 << len) - 1));
 +        return;
 +    }
-+    return false;
++
 +    if (use_armv7_instructions) {
 +        /* ubfx */
 +        tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
 +                  | (ofs << 7) | ((len - 1) << 16));
 +        return;
 +    }
 +
 +    assert(ofs % 8 == 0);
 +    switch (len) {
 +    case 8:
 +        /* uxtb */
 +        tcg_out32(s, 0x06ef0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
 +        break;
 +    case 16:
 +        /* uxth */
 +        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
  }
- static bool fold_andc(OptContext *ctx, TCGOp *op)
+ static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
-@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
+                              TCGReg rn, int ofs, int len)
  static bool fold_or(OptContext *ctx, TCGOp *op)
  {
--    return fold_const2(ctx, op);
+-    /* sbfx */
-+    if (fold_const2(ctx, op) ||
+-    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
-+        fold_xx_to_x(ctx, op)) {
+-              | (ofs << 7) | ((len - 1) << 16));
-+        return true;
++    if (use_armv7_instructions) {
 +        /* sbfx */
 +        tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
 +                  | (ofs << 7) | ((len - 1) << 16));
 +        return;
 +    }
-+    return false;
++
 +    assert(ofs % 8 == 0);
 +    switch (len) {
 +    case 8:
 +        /* sxtb */
 +        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
 +        break;
 +    case 16:
 +        /* sxth */
 +        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
  }
- static bool fold_orc(OptContext *ctx, TCGOp *op)
++
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+ static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
-             break;
+                           TCGReg rd, TCGReg rn, int32_t offset)
-         }
+ {
 -        /* Simplify expression for "op r, a, a => mov r, a" cases */
 -        switch (opc) {
 -        CASE_OP_32_64_VEC(or):
 -        CASE_OP_32_64_VEC(and):
 -            if (args_are_copies(op->args[1], op->args[2])) {
 -                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
 -                continue;
 -            }
 -            break;
 -        default:
 -            break;
 -        }
 -
          /*
           * Process each opcode.
           * Sorted alphabetically by opcode as much as possible.
 --
-.25.1
+.43.0

-[PULL 22/56] tcg/optimize: Split out fold_brcond2
+[PULL 52/68] tcg/loongarch64: Fold the ext{8, 16, 32}[us] cases into {s}extract
-Reduce some code duplication by folding the NE and EQ cases.
+Accept byte and word extensions with the extract opcodes.
 This is preparatory to removing the specialized extracts.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
+ tcg/loongarch64/tcg-target-has.h | 15 ++++++++++++--
-file changed, 81 insertions(+), 78 deletions(-)
+ tcg/loongarch64/tcg-target.c.inc | 34 ++++++++++++++++++++++++++++++--
 files changed, 45 insertions(+), 4 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/loongarch64/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/loongarch64/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@
-     return fold_const2(ctx, op);
+ #define TCG_TARGET_HAS_rot_i32          1
- }
+ #define TCG_TARGET_HAS_deposit_i32      1
+ #define TCG_TARGET_HAS_extract_i32      1
-+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
+-#define TCG_TARGET_HAS_sextract_i32     0
 +#define TCG_TARGET_HAS_sextract_i32     1
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_add2_i32         0
  #define TCG_TARGET_HAS_sub2_i32         0
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_rot_i64          1
  #define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract_i64      1
 -#define TCG_TARGET_HAS_sextract_i64     0
 +#define TCG_TARGET_HAS_sextract_i64     1
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_extr_i64_i32     1
  #define TCG_TARGET_HAS_ext8s_i64        1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_cmpsel_vec       0
  #define TCG_TARGET_HAS_tst_vec          0
 +#define TCG_TARGET_extract_valid(type, ofs, len)   1
 +
 +static inline bool
 +tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
-+    TCGCond cond = op->args[4];
++    if (type == TCG_TYPE_I64 && ofs + len == 32) {
-+    int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
++        return true;
 +    TCGArg label = op->args[5];
 +    int inv = 0;
 +
 +    if (i >= 0) {
 +        goto do_brcond_const;
 +    }
-+
++    return ofs == 0 && (len == 8 || len == 16);
-+    switch (cond) {
++}
-+    case TCG_COND_LT:
++#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
-+    case TCG_COND_GE:
-+        /*
+ #endif
-+         * Simplify LT/GE comparisons vs zero to a single compare
+diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
-+         * vs the high word of the input.
+index XXXXXXX..XXXXXXX 100644
-+         */
+--- a/tcg/loongarch64/tcg-target.c.inc
-+        if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
++++ b/tcg/loongarch64/tcg-target.c.inc
-+            arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-+            goto do_brcond_high;
+         break;
      case INDEX_op_extract_i32:
 -        tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
 +        if (a2 == 0 && args[3] <= 12) {
 +            tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1);
 +        } else {
 +            tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
 +        }
          break;
      case INDEX_op_extract_i64:
 -        tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
 +        if (a2 == 0 && args[3] <= 12) {
 +            tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1);
 +        } else {
 +            tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
 +        }
 +        break;
 +
-+    case TCG_COND_NE:
++    case INDEX_op_sextract_i64:
-+        inv = 1;
++        if (a2 + args[3] == 32) {
-+        QEMU_FALLTHROUGH;
++            if (a2 == 0) {
-+    case TCG_COND_EQ:
++                tcg_out_ext32s(s, a0, a1);
-+        /*
++            } else {
-+         * Simplify EQ/NE comparisons where one of the pairs
++                tcg_out_opc_srai_w(s, a0, a1, a2);
-+         * can be simplified.
++            }
 +         */
 +        i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
 +                                     op->args[2], cond);
 +        switch (i ^ inv) {
 +        case 0:
 +            goto do_brcond_const;
 +        case 1:
 +            goto do_brcond_high;
 +        }
 +
 +        i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
 +                                     op->args[3], cond);
 +        switch (i ^ inv) {
 +        case 0:
 +            goto do_brcond_const;
 +        case 1:
 +            op->opc = INDEX_op_brcond_i32;
 +            op->args[1] = op->args[2];
 +            op->args[2] = cond;
 +            op->args[3] = label;
 +            break;
 +        }
-+        break;
++        /* FALLTHRU */
-+
++    case INDEX_op_sextract_i32:
-+    default:
++        if (a2 == 0 && args[3] == 8) {
-+        break;
++            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
-+
++        } else if (a2 == 0 && args[3] == 16) {
-+    do_brcond_high:
++            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
-+        op->opc = INDEX_op_brcond_i32;
++        } else {
-+        op->args[0] = op->args[1];
++            g_assert_not_reached();
 +        op->args[1] = op->args[3];
 +        op->args[2] = cond;
 +        op->args[3] = label;
 +        break;
 +
 +    do_brcond_const:
 +        if (i == 0) {
 +            tcg_op_remove(ctx->tcg, op);
 +            return true;
 +        }
-+        op->opc = INDEX_op_br;
+         break;
-+        op->args[0] = label;
-+        break;
+     case INDEX_op_deposit_i32:
-+    }
+@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
-+    return false;
+     case INDEX_op_not_i64:
-+}
+     case INDEX_op_extract_i32:
-+
+     case INDEX_op_extract_i64:
- static bool fold_call(OptContext *ctx, TCGOp *op)
++    case INDEX_op_sextract_i32:
- {
++    case INDEX_op_sextract_i64:
-     TCGContext *s = ctx->tcg;
+     case INDEX_op_bswap16_i32:
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+     case INDEX_op_bswap16_i64:
-             }
+     case INDEX_op_bswap32_i32:
              break;
 -        case INDEX_op_brcond2_i32:
 -            i = do_constant_folding_cond2(&op->args[0], &op->args[2],
 -                                          op->args[4]);
 -            if (i == 0) {
 -            do_brcond_false:
 -                tcg_op_remove(s, op);
 -                continue;
 -            }
 -            if (i > 0) {
 -            do_brcond_true:
 -                op->opc = opc = INDEX_op_br;
 -                op->args[0] = op->args[5];
 -                break;
 -            }
 -            if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
 -                 && arg_is_const(op->args[2])
 -                 && arg_info(op->args[2])->val == 0
 -                 && arg_is_const(op->args[3])
 -                 && arg_info(op->args[3])->val == 0) {
 -                /* Simplify LT/GE comparisons vs zero to a single compare
 -                   vs the high word of the input.  */
 -            do_brcond_high:
 -                op->opc = opc = INDEX_op_brcond_i32;
 -                op->args[0] = op->args[1];
 -                op->args[1] = op->args[3];
 -                op->args[2] = op->args[4];
 -                op->args[3] = op->args[5];
 -                break;
 -            }
 -            if (op->args[4] == TCG_COND_EQ) {
 -                /* Simplify EQ comparisons where one of the pairs
 -                   can be simplified.  */
 -                i = do_constant_folding_cond(INDEX_op_brcond_i32,
 -                                             op->args[0], op->args[2],
 -                                             TCG_COND_EQ);
 -                if (i == 0) {
 -                    goto do_brcond_false;
 -                } else if (i > 0) {
 -                    goto do_brcond_high;
 -                }
 -                i = do_constant_folding_cond(INDEX_op_brcond_i32,
 -                                             op->args[1], op->args[3],
 -                                             TCG_COND_EQ);
 -                if (i == 0) {
 -                    goto do_brcond_false;
 -                } else if (i < 0) {
 -                    break;
 -                }
 -            do_brcond_low:
 -                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
 -                op->opc = INDEX_op_brcond_i32;
 -                op->args[1] = op->args[2];
 -                op->args[2] = op->args[4];
 -                op->args[3] = op->args[5];
 -                break;
 -            }
 -            if (op->args[4] == TCG_COND_NE) {
 -                /* Simplify NE comparisons where one of the pairs
 -                   can be simplified.  */
 -                i = do_constant_folding_cond(INDEX_op_brcond_i32,
 -                                             op->args[0], op->args[2],
 -                                             TCG_COND_NE);
 -                if (i == 0) {
 -                    goto do_brcond_high;
 -                } else if (i > 0) {
 -                    goto do_brcond_true;
 -                }
 -                i = do_constant_folding_cond(INDEX_op_brcond_i32,
 -                                             op->args[1], op->args[3],
 -                                             TCG_COND_NE);
 -                if (i == 0) {
 -                    goto do_brcond_low;
 -                } else if (i > 0) {
 -                    goto do_brcond_true;
 -                }
 -            }
 -            break;
 -
          default:
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64_VEC(andc):
              done = fold_andc(&ctx, op);
              break;
 +        case INDEX_op_brcond2_i32:
 +            done = fold_brcond2(&ctx, op);
 +            break;
          CASE_OP_32_64(ctpop):
              done = fold_ctpop(&ctx, op);
              break;
 --
-.25.1
+.43.0

-[PULL 27/56] tcg/optimize: Split out fold_movcond
+[PULL 53/68] tcg/mips: Fold the ext{8, 16, 32}[us] cases into {s}extract
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Accept AND, ext32u, ext32s extensions with the extract opcodes.
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+This is preparatory to removing the specialized extracts.
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
+ tcg/mips/tcg-target-has.h | 26 ++++++++++++++++++++++----
-file changed, 31 insertions(+), 25 deletions(-)
+ tcg/mips/tcg-target.c.inc | 33 ++++++++++++++++++++++++++++++---
 files changed, 52 insertions(+), 7 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/mips/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/mips/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
-     return true;
- }
+ /* optional instructions detected at runtime */
+ #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
-+static bool fold_movcond(OptContext *ctx, TCGOp *op)
+-#define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
 -#define TCG_TARGET_HAS_sextract_i32     0
 +#define TCG_TARGET_HAS_extract_i32      1
 +#define TCG_TARGET_HAS_sextract_i32     1
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
  #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
  #define TCG_TARGET_HAS_bswap32_i64      1
  #define TCG_TARGET_HAS_bswap64_i64      1
  #define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
 -#define TCG_TARGET_HAS_extract_i64      use_mips32r2_instructions
 -#define TCG_TARGET_HAS_sextract_i64     0
 +#define TCG_TARGET_HAS_extract_i64      1
 +#define TCG_TARGET_HAS_sextract_i64     1
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
  #define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
  #define TCG_TARGET_HAS_qemu_ldst_i128   0
  #define TCG_TARGET_HAS_tst              0
 +#define TCG_TARGET_extract_valid(type, ofs, len)  use_mips32r2_instructions
 +
 +static inline bool
 +tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
-+    TCGOpcode opc = op->opc;
++    if (ofs == 0) {
-+    TCGCond cond = op->args[5];
++        switch (len) {
-+    int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
++        case 8:
-+
++        case 16:
-+    if (i >= 0) {
++            return use_mips32r2_instructions;
-+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
++        case 32:
-+    }
++            return type == TCG_TYPE_I64;
 +
 +    if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
 +        uint64_t tv = arg_info(op->args[3])->val;
 +        uint64_t fv = arg_info(op->args[4])->val;
 +
 +        opc = (opc == INDEX_op_movcond_i32
 +               ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
 +
 +        if (tv == 1 && fv == 0) {
 +            op->opc = opc;
 +            op->args[3] = cond;
 +        } else if (fv == 1 && tv == 0) {
 +            op->opc = opc;
 +            op->args[3] = tcg_invert_cond(cond);
 +        }
 +    }
 +    return false;
 +}
++#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 +
- static bool fold_mul(OptContext *ctx, TCGOp *op)
+ #endif
- {
+diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
-     return fold_const2(ctx, op);
+index XXXXXXX..XXXXXXX 100644
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+--- a/tcg/mips/tcg-target.c.inc
-             }
++++ b/tcg/mips/tcg-target.c.inc
-             break;
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
+         tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2,
--        CASE_OP_32_64(movcond):
+                          args[3] + args[4] - 1, args[3]);
--            i = do_constant_folding_cond(opc, op->args[1],
+         break;
--                                         op->args[2], op->args[5]);
++
--            if (i >= 0) {
+     case INDEX_op_extract_i32:
--                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
+-        tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2);
--                continue;
++        if (a2 == 0 && args[3] <= 16) {
--            }
++            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1);
--            if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
++        } else {
--                uint64_t tv = arg_info(op->args[3])->val;
++            tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2);
--                uint64_t fv = arg_info(op->args[4])->val;
++        }
--                TCGCond cond = op->args[5];
+         break;
--
+     case INDEX_op_extract_i64:
--                if (fv == 1 && tv == 0) {
+-        tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1,
--                    cond = tcg_invert_cond(cond);
+-                         args[3] - 1, a2);
--                } else if (!(tv == 1 && fv == 0)) {
++        if (a2 == 0 && args[3] <= 16) {
--                    break;
++            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1);
--                }
++        } else {
--                op->args[3] = cond;
++            tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU,
--                op->opc = opc = (opc == INDEX_op_movcond_i32
++                             a0, a1, args[3] - 1, a2);
--                                 ? INDEX_op_setcond_i32
++        }
--                                 : INDEX_op_setcond_i64);
++        break;
--            }
++
--            break;
++    case INDEX_op_sextract_i64:
--
++        if (a2 == 0 && args[3] == 32) {
--
++            tcg_out_ext32s(s, a0, a1);
          default:
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          case INDEX_op_mb:
              done = fold_mb(&ctx, op);
              break;
 +        CASE_OP_32_64(movcond):
 +            done = fold_movcond(&ctx, op);
 +            break;
-         CASE_OP_32_64(mul):
++        }
-             done = fold_mul(&ctx, op);
++        /* FALLTHRU */
-             break;
++    case INDEX_op_sextract_i32:
 +        if (a2 == 0 && args[3] == 8) {
 +            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
 +        } else if (a2 == 0 && args[3] == 16) {
 +            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
 +        } else {
 +            g_assert_not_reached();
 +        }
          break;
      case INDEX_op_brcond_i32:
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
      case INDEX_op_ext8s_i32:
      case INDEX_op_ext16s_i32:
      case INDEX_op_extract_i32:
 +    case INDEX_op_sextract_i32:
      case INDEX_op_ld8u_i64:
      case INDEX_op_ld8s_i64:
      case INDEX_op_ld16u_i64:
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
      case INDEX_op_extrl_i64_i32:
      case INDEX_op_extrh_i64_i32:
      case INDEX_op_extract_i64:
 +    case INDEX_op_sextract_i64:
          return C_O1_I1(r, r);
      case INDEX_op_st8_i32:
 --
-.25.1
+.43.0

-[PULL 21/56] tcg/optimize: Split out fold_setcond2
+[PULL 54/68] tcg/ppc: Fold the ext{8, 16, 32}[us] cases into {s}extract
-Reduce some code duplication by folding the NE and EQ cases.
+Accept byte and word extensions with the extract opcodes.
 This is preparatory to removing the specialized extracts.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
+ tcg/ppc/tcg-target-has.h | 16 ++++++++++++++--
-file changed, 72 insertions(+), 73 deletions(-)
+ tcg/ppc/tcg-target.c.inc | 30 ++++++++++++++++++++++++++++++
 files changed, 44 insertions(+), 2 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/ppc/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/ppc/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@
-     return fold_const2(ctx, op);
+ #define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
- }
+ #define TCG_TARGET_HAS_deposit_i32      1
+ #define TCG_TARGET_HAS_extract_i32      1
-+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
+-#define TCG_TARGET_HAS_sextract_i32     0
 +#define TCG_TARGET_HAS_sextract_i32     1
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_mulu2_i32        0
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
  #define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract_i64      1
 -#define TCG_TARGET_HAS_sextract_i64     0
 +#define TCG_TARGET_HAS_sextract_i64     1
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_negsetcond_i64   1
  #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_cmpsel_vec       1
  #define TCG_TARGET_HAS_tst_vec          0
 +#define TCG_TARGET_extract_valid(type, ofs, len)   1
 +
 +static inline bool
 +tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
-+    TCGCond cond = op->args[5];
++    if (type == TCG_TYPE_I64 && ofs + len == 32) {
-+    int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
++        return true;
-+    int inv = 0;
++    }
 +    return ofs == 0 && (len == 8 || len == 16);
 +}
 +#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 +
-+    if (i >= 0) {
+ #endif
-+        goto do_setcond_const;
+diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
-+    }
+index XXXXXXX..XXXXXXX 100644
-+
+--- a/tcg/ppc/tcg-target.c.inc
-+    switch (cond) {
++++ b/tcg/ppc/tcg-target.c.inc
-+    case TCG_COND_LT:
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-+    case TCG_COND_GE:
+         break;
-+        /*
-+         * Simplify LT/GE comparisons vs zero to a single compare
+     case INDEX_op_extract_i32:
-+         * vs the high word of the input.
++        if (args[2] == 0 && args[3] <= 16) {
-+         */
++            tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
-+        if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
++            break;
-+            arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
++        }
-+            goto do_setcond_high;
+         tcg_out_rlw(s, RLWINM, args[0], args[1],
 - args[2], 32 - args[3], 31);
          break;
      case INDEX_op_extract_i64:
 +        if (args[2] == 0 && args[3] <= 16) {
 +            tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
 +            break;
 +        }
          tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
          break;
 +    case INDEX_op_sextract_i64:
 +        if (args[2] + args[3] == 32) {
 +            if (args[2] == 0) {
 +                tcg_out_ext32s(s, args[0], args[1]);
 +            } else {
 +                tcg_out_sari32(s, args[0], args[1], args[2]);
 +            }
 +            break;
 +        }
 +        /* FALLTHRU */
 +    case INDEX_op_sextract_i32:
 +        if (args[2] == 0 && args[3] == 8) {
 +            tcg_out_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
 +        } else if (args[2] == 0 && args[3] == 16) {
 +            tcg_out_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
 +        } else {
 +            g_assert_not_reached();
 +        }
 +        break;
 +
-+    case TCG_COND_NE:
+     case INDEX_op_movcond_i32:
-+        inv = 1;
+         tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
-+        QEMU_FALLTHROUGH;
+                         args[3], args[4], const_args[2]);
-+    case TCG_COND_EQ:
+@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
-+        /*
+     case INDEX_op_bswap16_i32:
-+         * Simplify EQ/NE comparisons where one of the pairs
+     case INDEX_op_bswap32_i32:
-+         * can be simplified.
+     case INDEX_op_extract_i32:
-+         */
++    case INDEX_op_sextract_i32:
-+        i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
+     case INDEX_op_ld8u_i64:
-+                                     op->args[3], cond);
+     case INDEX_op_ld8s_i64:
-+        switch (i ^ inv) {
+     case INDEX_op_ld16u_i64:
-+        case 0:
+@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
-+            goto do_setcond_const;
+     case INDEX_op_bswap32_i64:
-+        case 1:
+     case INDEX_op_bswap64_i64:
-+            goto do_setcond_high;
+     case INDEX_op_extract_i64:
-+        }
++    case INDEX_op_sextract_i64:
-+
+         return C_O1_I1(r, r);
-+        i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
-+                                     op->args[4], cond);
+     case INDEX_op_st8_i32:
 +        switch (i ^ inv) {
 +        case 0:
 +            goto do_setcond_const;
 +        case 1:
 +            op->args[2] = op->args[3];
 +            op->args[3] = cond;
 +            op->opc = INDEX_op_setcond_i32;
 +            break;
 +        }
 +        break;
 +
 +    default:
 +        break;
 +
 +    do_setcond_high:
 +        op->args[1] = op->args[2];
 +        op->args[2] = op->args[4];
 +        op->args[3] = cond;
 +        op->opc = INDEX_op_setcond_i32;
 +        break;
 +    }
 +    return false;
 +
 + do_setcond_const:
 +    return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 +}
 +
  static bool fold_shift(OptContext *ctx, TCGOp *op)
  {
      return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              }
              break;
 -        case INDEX_op_setcond2_i32:
 -            i = do_constant_folding_cond2(&op->args[1], &op->args[3],
 -                                          op->args[5]);
 -            if (i >= 0) {
 -            do_setcond_const:
 -                tcg_opt_gen_movi(&ctx, op, op->args[0], i);
 -                continue;
 -            }
 -            if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
 -                 && arg_is_const(op->args[3])
 -                 && arg_info(op->args[3])->val == 0
 -                 && arg_is_const(op->args[4])
 -                 && arg_info(op->args[4])->val == 0) {
 -                /* Simplify LT/GE comparisons vs zero to a single compare
 -                   vs the high word of the input.  */
 -            do_setcond_high:
 -                reset_temp(op->args[0]);
 -                arg_info(op->args[0])->z_mask = 1;
 -                op->opc = INDEX_op_setcond_i32;
 -                op->args[1] = op->args[2];
 -                op->args[2] = op->args[4];
 -                op->args[3] = op->args[5];
 -                break;
 -            }
 -            if (op->args[5] == TCG_COND_EQ) {
 -                /* Simplify EQ comparisons where one of the pairs
 -                   can be simplified.  */
 -                i = do_constant_folding_cond(INDEX_op_setcond_i32,
 -                                             op->args[1], op->args[3],
 -                                             TCG_COND_EQ);
 -                if (i == 0) {
 -                    goto do_setcond_const;
 -                } else if (i > 0) {
 -                    goto do_setcond_high;
 -                }
 -                i = do_constant_folding_cond(INDEX_op_setcond_i32,
 -                                             op->args[2], op->args[4],
 -                                             TCG_COND_EQ);
 -                if (i == 0) {
 -                    goto do_setcond_high;
 -                } else if (i < 0) {
 -                    break;
 -                }
 -            do_setcond_low:
 -                reset_temp(op->args[0]);
 -                arg_info(op->args[0])->z_mask = 1;
 -                op->opc = INDEX_op_setcond_i32;
 -                op->args[2] = op->args[3];
 -                op->args[3] = op->args[5];
 -                break;
 -            }
 -            if (op->args[5] == TCG_COND_NE) {
 -                /* Simplify NE comparisons where one of the pairs
 -                   can be simplified.  */
 -                i = do_constant_folding_cond(INDEX_op_setcond_i32,
 -                                             op->args[1], op->args[3],
 -                                             TCG_COND_NE);
 -                if (i == 0) {
 -                    goto do_setcond_high;
 -                } else if (i > 0) {
 -                    goto do_setcond_const;
 -                }
 -                i = do_constant_folding_cond(INDEX_op_setcond_i32,
 -                                             op->args[2], op->args[4],
 -                                             TCG_COND_NE);
 -                if (i == 0) {
 -                    goto do_setcond_low;
 -                } else if (i > 0) {
 -                    goto do_setcond_const;
 -                }
 -            }
 -            break;
 -
          default:
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64(shr):
              done = fold_shift(&ctx, op);
              break;
 +        case INDEX_op_setcond2_i32:
 +            done = fold_setcond2(&ctx, op);
 +            break;
          CASE_OP_32_64_VEC(sub):
              done = fold_sub(&ctx, op);
              break;
 --
-.25.1
+.43.0

-[PULL 33/56] tcg/optimize: Split out fold_dup, fold_dup2
+[PULL 55/68] tcg/riscv64: Fold the ext{8, 16, 32}[us] cases into {s}extract
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Accept byte and word extensions with the extract opcodes.
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+This is preparatory to removing the specialized extracts.
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
+ tcg/riscv/tcg-target-has.h | 39 ++++++++++++++++++++++++++++++++++----
-file changed, 31 insertions(+), 22 deletions(-)
+ tcg/riscv/tcg-target.c.inc | 34 +++++++++++++++++++++++++++++++++
 files changed, 69 insertions(+), 4 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/riscv/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/riscv/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@
-     return fold_const2(ctx, op);
+ #define TCG_TARGET_HAS_div2_i32         0
- }
+ #define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
+ #define TCG_TARGET_HAS_deposit_i32      0
-+static bool fold_dup(OptContext *ctx, TCGOp *op)
+-#define TCG_TARGET_HAS_extract_i32      0
 -#define TCG_TARGET_HAS_sextract_i32     0
 +#define TCG_TARGET_HAS_extract_i32      1
 +#define TCG_TARGET_HAS_sextract_i32     1
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_add2_i32         1
  #define TCG_TARGET_HAS_sub2_i32         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_div2_i64         0
  #define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
  #define TCG_TARGET_HAS_deposit_i64      0
 -#define TCG_TARGET_HAS_extract_i64      0
 -#define TCG_TARGET_HAS_sextract_i64     0
 +#define TCG_TARGET_HAS_extract_i64      1
 +#define TCG_TARGET_HAS_sextract_i64     1
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_extr_i64_i32     1
  #define TCG_TARGET_HAS_ext8s_i64        1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_tst_vec          0
 +static inline bool
 +tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
-+    if (arg_is_const(op->args[1])) {
++    if (ofs == 0) {
-+        uint64_t t = arg_info(op->args[1])->val;
++        switch (len) {
-+        t = dup_const(TCGOP_VECE(op), t);
++        case 16:
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
++            return cpuinfo & CPUINFO_ZBB;
 +        case 32:
 +            return (cpuinfo & CPUINFO_ZBA) && type == TCG_TYPE_I64;
 +        }
 +    }
 +    return false;
 +}
++#define TCG_TARGET_extract_valid  tcg_target_extract_valid
 +
-+static bool fold_dup2(OptContext *ctx, TCGOp *op)
++static inline bool
 +tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
-+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
++    if (ofs == 0) {
-+        uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
++        switch (len) {
-+                               arg_info(op->args[2])->val);
++        case 8:
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
++        case 16:
-+    }
++            return cpuinfo & CPUINFO_ZBB;
-+
++        case 32:
-+    if (args_are_copies(op->args[1], op->args[2])) {
++            return type == TCG_TYPE_I64;
-+        op->opc = INDEX_op_dup_vec;
++        }
 +        TCGOP_VECE(op) = MO_32;
 +    }
 +    return false;
 +}
++#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 +
- static bool fold_eqv(OptContext *ctx, TCGOp *op)
+ #endif
- {
+diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
-     return fold_const2(ctx, op);
+index XXXXXXX..XXXXXXX 100644
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+--- a/tcg/riscv/tcg-target.c.inc
-             done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
++++ b/tcg/riscv/tcg-target.c.inc
-             break;
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
+         tcg_out_mb(s, a0);
--        case INDEX_op_dup_vec:
+         break;
--            if (arg_is_const(op->args[1])) {
--                tmp = arg_info(op->args[1])->val;
++    case INDEX_op_extract_i64:
--                tmp = dup_const(TCGOP_VECE(op), tmp);
++        if (a2 == 0 && args[3] == 32) {
--                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
++            tcg_out_ext32u(s, a0, a1);
 -                continue;
 -            }
 -            break;
 -
 -        case INDEX_op_dup2_vec:
 -            assert(TCG_TARGET_REG_BITS == 32);
 -            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 -                tcg_opt_gen_movi(&ctx, op, op->args[0],
 -                                 deposit64(arg_info(op->args[1])->val, 32, 32,
 -                                           arg_info(op->args[2])->val));
 -                continue;
 -            } else if (args_are_copies(op->args[1], op->args[2])) {
 -                op->opc = INDEX_op_dup_vec;
 -                TCGOP_VECE(op) = MO_32;
 -            }
 -            break;
 -
          default:
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64(divu):
              done = fold_divide(&ctx, op);
              break;
 +        case INDEX_op_dup_vec:
 +            done = fold_dup(&ctx, op);
 +            break;
-+        case INDEX_op_dup2_vec:
++        }
-+            done = fold_dup2(&ctx, op);
++        /* FALLTHRU */
 +    case INDEX_op_extract_i32:
 +        if (a2 == 0 && args[3] == 16) {
 +            tcg_out_ext16u(s, a0, a1);
 +        } else {
 +            g_assert_not_reached();
 +        }
 +        break;
 +
 +    case INDEX_op_sextract_i64:
 +        if (a2 == 0 && args[3] == 32) {
 +            tcg_out_ext32s(s, a0, a1);
 +            break;
-         CASE_OP_32_64(eqv):
++        }
-             done = fold_eqv(&ctx, op);
++        /* FALLTHRU */
-             break;
++    case INDEX_op_sextract_i32:
 +        if (a2 == 0 && args[3] == 8) {
 +            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
 +        } else if (a2 == 0 && args[3] == 16) {
 +            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
 +        } else {
 +            g_assert_not_reached();
 +        }
 +        break;
 +
      case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
      case INDEX_op_mov_i64:
      case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
      case INDEX_op_extrl_i64_i32:
      case INDEX_op_extrh_i64_i32:
      case INDEX_op_ext_i32_i64:
 +    case INDEX_op_extract_i32:
 +    case INDEX_op_extract_i64:
 +    case INDEX_op_sextract_i32:
 +    case INDEX_op_sextract_i64:
      case INDEX_op_bswap16_i32:
      case INDEX_op_bswap32_i32:
      case INDEX_op_bswap16_i64:
 --
-.25.1
+.43.0

-[PULL 16/56] tcg/optimize: Return true from tcg_opt_gen_{mov,movi}
+[PULL 56/68] tcg/riscv: Use SRAIW, SRLIW for {s}extract_i64
-This will allow callers to tail call to these functions
+Extracts which abut bit 32 may use 32-bit shifts.
 and return true indicating processing complete.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 9 +++++----
+ tcg/riscv/tcg-target-has.h | 24 +++++++-----------------
-file changed, 5 insertions(+), 4 deletions(-)
+ tcg/riscv/tcg-target.c.inc | 16 ++++++++++++----
 files changed, 19 insertions(+), 21 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/riscv/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/riscv/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
+@@ -XXX,XX +XXX,XX @@
-     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
+ static inline bool
  tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
  {
 -    if (ofs == 0) {
 -        switch (len) {
 -        case 16:
 -            return cpuinfo & CPUINFO_ZBB;
 -        case 32:
 -            return (cpuinfo & CPUINFO_ZBA) && type == TCG_TYPE_I64;
 -        }
 +    if (type == TCG_TYPE_I64 && ofs + len == 32) {
 +        /* ofs > 0 uses SRLIW; ofs == 0 uses add.uw. */
 +        return ofs || (cpuinfo & CPUINFO_ZBA);
      }
 -    return false;
 +    return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && len == 16;
  }
+ #define TCG_TARGET_extract_valid  tcg_target_extract_valid
--static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
-+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+ static inline bool
  tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
  {
-     TCGTemp *dst_ts = arg_temp(dst);
+-    if (ofs == 0) {
-     TCGTemp *src_ts = arg_temp(src);
+-        switch (len) {
-@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+-        case 8:
+-        case 16:
-     if (ts_are_copies(dst_ts, src_ts)) {
+-            return cpuinfo & CPUINFO_ZBB;
-         tcg_op_remove(ctx->tcg, op);
+-        case 32:
--        return;
+-            return type == TCG_TYPE_I64;
 -        }
 +    if (type == TCG_TYPE_I64 && ofs + len == 32) {
 +        return true;
      }
+-    return false;
-     reset_ts(dst_ts);
++    return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && (len == 8 || len == 16);
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
          di->is_const = si->is_const;
          di->val = si->val;
      }
 +    return true;
  }
+ #define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
--static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
-+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
+diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
-                              TCGArg dst, uint64_t val)
+index XXXXXXX..XXXXXXX 100644
- {
+--- a/tcg/riscv/tcg-target.c.inc
-     const TCGOpDef *def = &tcg_op_defs[op->opc];
++++ b/tcg/riscv/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-     /* Convert movi to mov with constant temp. */
+         break;
-     tv = tcg_constant_internal(type, val);
-     init_ts_info(ctx, tv);
+     case INDEX_op_extract_i64:
--    tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
+-        if (a2 == 0 && args[3] == 32) {
-+    return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
+-            tcg_out_ext32u(s, a0, a1);
- }
++        if (a2 + args[3] == 32) {
++            if (a2 == 0) {
- static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
++                tcg_out_ext32u(s, a0, a1);
 +            } else {
 +                tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2);
 +            }
              break;
          }
          /* FALLTHRU */
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
          break;
      case INDEX_op_sextract_i64:
 -        if (a2 == 0 && args[3] == 32) {
 -            tcg_out_ext32s(s, a0, a1);
 +        if (a2 + args[3] == 32) {
 +            if (a2 == 0) {
 +                tcg_out_ext32s(s, a0, a1);
 +            } else {
 +                tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2);
 +            }
              break;
          }
          /* FALLTHRU */
 --
-.25.1
+.43.0

-[PULL 29/56] tcg/optimize: Split out fold_extract, fold_sextract
+[PULL 57/68] tcg/s390x: Fold the ext{8, 16, 32}[us] cases into {s}extract
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+Accept byte and word extensions with the extract opcodes.
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+This is preparatory to removing the specialized extracts.
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
+ tcg/s390x/tcg-target-has.h | 22 ++++++++++++++++++++--
-file changed, 30 insertions(+), 18 deletions(-)
+ tcg/s390x/tcg-target.c.inc | 37 +++++++++++++++++++++++++++++++++++++
 files changed, 57 insertions(+), 2 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/s390x/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/s390x/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
-     return fold_const2(ctx, op);
+ #define TCG_TARGET_HAS_ctpop_i32      1
- }
+ #define TCG_TARGET_HAS_deposit_i32    1
+ #define TCG_TARGET_HAS_extract_i32    1
-+static bool fold_extract(OptContext *ctx, TCGOp *op)
+-#define TCG_TARGET_HAS_sextract_i32   0
 +#define TCG_TARGET_HAS_sextract_i32   1
  #define TCG_TARGET_HAS_extract2_i32   0
  #define TCG_TARGET_HAS_negsetcond_i32 1
  #define TCG_TARGET_HAS_add2_i32       1
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
  #define TCG_TARGET_HAS_ctpop_i64      1
  #define TCG_TARGET_HAS_deposit_i64    1
  #define TCG_TARGET_HAS_extract_i64    1
 -#define TCG_TARGET_HAS_sextract_i64   0
 +#define TCG_TARGET_HAS_sextract_i64   1
  #define TCG_TARGET_HAS_extract2_i64   0
  #define TCG_TARGET_HAS_negsetcond_i64 1
  #define TCG_TARGET_HAS_add2_i64       1
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
  #define TCG_TARGET_HAS_cmpsel_vec     1
  #define TCG_TARGET_HAS_tst_vec        0
 +#define TCG_TARGET_extract_valid(type, ofs, len)   1
 +
 +static inline bool
 +tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 +{
-+    if (arg_is_const(op->args[1])) {
++    if (ofs == 0) {
-+        uint64_t t;
++        switch (len) {
-+
++        case 8:
-+        t = arg_info(op->args[1])->val;
++        case 16:
-+        t = extract64(t, op->args[2], op->args[3]);
++            return true;
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
++        case 32:
 +            return type == TCG_TYPE_I64;
 +        }
 +    }
 +    return false;
 +}
++#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 +
- static bool fold_extract2(OptContext *ctx, TCGOp *op)
+ #endif
 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390x/tcg-target.c.inc
 +++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
  static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
                           int ofs, int len)
  {
-     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
++    if (ofs == 0) {
-@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
++        switch (len) {
-     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
++        case 8:
 +            tcg_out_ext8u(s, dest, src);
 +            return;
 +        case 16:
 +            tcg_out_ext16u(s, dest, src);
 +            return;
 +        case 32:
 +            tcg_out_ext32u(s, dest, src);
 +            return;
 +        }
 +    }
      tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
  }
-+static bool fold_sextract(OptContext *ctx, TCGOp *op)
++static void tgen_sextract(TCGContext *s, TCGReg dest, TCGReg src,
 +                          int ofs, int len)
 +{
-+    if (arg_is_const(op->args[1])) {
++    if (ofs == 0) {
-+        uint64_t t;
++        switch (len) {
-+
++        case 8:
-+        t = arg_info(op->args[1])->val;
++            tcg_out_ext8s(s, TCG_TYPE_REG, dest, src);
-+        t = sextract64(t, op->args[2], op->args[3]);
++            return;
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
++        case 16:
 +            tcg_out_ext16s(s, TCG_TYPE_REG, dest, src);
 +            return;
 +        case 32:
 +            tcg_out_ext32s(s, dest, src);
 +            return;
 +        }
 +    }
-+    return false;
++    g_assert_not_reached();
 +}
 +
- static bool fold_shift(OptContext *ctx, TCGOp *op)
+ static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
  {
-     return fold_const2(ctx, op);
+     ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-             }
+     OP_32_64(extract):
-             break;
+         tgen_extract(s, args[0], args[1], args[2], args[3]);
+         break;
--        CASE_OP_32_64(extract):
++    OP_32_64(sextract):
--            if (arg_is_const(op->args[1])) {
++        tgen_sextract(s, args[0], args[1], args[2], args[3]);
--                tmp = extract64(arg_info(op->args[1])->val,
++        break;
--                                op->args[2], op->args[3]);
--                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
+     case INDEX_op_clz_i64:
--                continue;
+         tgen_clz(s, args[0], args[1], args[2], const_args[2]);
--            }
+@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
--            break;
+     case INDEX_op_extu_i32_i64:
--
+     case INDEX_op_extract_i32:
--        CASE_OP_32_64(sextract):
+     case INDEX_op_extract_i64:
--            if (arg_is_const(op->args[1])) {
++    case INDEX_op_sextract_i32:
--                tmp = sextract64(arg_info(op->args[1])->val,
++    case INDEX_op_sextract_i64:
--                                 op->args[2], op->args[3]);
+     case INDEX_op_ctpop_i32:
--                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
+     case INDEX_op_ctpop_i64:
--                continue;
+         return C_O1_I1(r, r);
 -            }
 -            break;
 -
          default:
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64(eqv):
              done = fold_eqv(&ctx, op);
              break;
 +        CASE_OP_32_64(extract):
 +            done = fold_extract(&ctx, op);
 +            break;
          CASE_OP_32_64(extract2):
              done = fold_extract2(&ctx, op);
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          case INDEX_op_setcond2_i32:
              done = fold_setcond2(&ctx, op);
              break;
 +        CASE_OP_32_64(sextract):
 +            done = fold_sextract(&ctx, op);
 +            break;
          CASE_OP_32_64_VEC(sub):
              done = fold_sub(&ctx, op);
              break;
 --
-.25.1
+.43.0

-[PULL 12/56] tcg/optimize: Split out copy_propagate
+[PULL 58/68] tcg/sparc64: Use SRA, SRL for {s}extract_i64
-Continue splitting tcg_optimize.
+Extracts which abut bit 32 may use 32-bit shifts.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 22 ++++++++++++++--------
+ tcg/sparc64/tcg-target-has.h | 13 +++++++++----
-file changed, 14 insertions(+), 8 deletions(-)
+ tcg/sparc64/tcg-target.c.inc | 11 +++++++++++
 files changed, 20 insertions(+), 4 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/sparc64/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/sparc64/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
+@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
-     }
+ #define TCG_TARGET_HAS_ctz_i32          0
- }
+ #define TCG_TARGET_HAS_ctpop_i32        0
+ #define TCG_TARGET_HAS_deposit_i32      0
-+static void copy_propagate(OptContext *ctx, TCGOp *op,
+-#define TCG_TARGET_HAS_extract_i32      0
-+                           int nb_oargs, int nb_iargs)
+-#define TCG_TARGET_HAS_sextract_i32     0
-+{
++#define TCG_TARGET_HAS_extract_i32      1
-+    TCGContext *s = ctx->tcg;
++#define TCG_TARGET_HAS_sextract_i32     1
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
  #define TCG_TARGET_HAS_ctz_i64          0
  #define TCG_TARGET_HAS_ctpop_i64        0
  #define TCG_TARGET_HAS_deposit_i64      0
 -#define TCG_TARGET_HAS_extract_i64      0
 -#define TCG_TARGET_HAS_sextract_i64     0
 +#define TCG_TARGET_HAS_extract_i64      1
 +#define TCG_TARGET_HAS_sextract_i64     1
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_negsetcond_i64   1
  #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
  #define TCG_TARGET_HAS_tst              1
 +#define TCG_TARGET_extract_valid(type, ofs, len) \
 +    ((type) == TCG_TYPE_I64 && (ofs) + (len) == 32)
 +
-+    for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
++#define TCG_TARGET_sextract_valid  TCG_TARGET_extract_valid
 +        TCGTemp *ts = arg_temp(op->args[i]);
 +        if (ts && ts_is_copy(ts)) {
 +            op->args[i] = temp_arg(find_better_copy(s, ts));
 +        }
 +    }
 +}
 +
- /* Propagate constants and copies, fold constant expressions. */
+ #endif
- void tcg_optimize(TCGContext *s)
+diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
- {
+index XXXXXXX..XXXXXXX 100644
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+--- a/tcg/sparc64/tcg-target.c.inc
-             nb_iargs = def->nb_iargs;
++++ b/tcg/sparc64/tcg-target.c.inc
-         }
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-         init_arguments(&ctx, op, nb_oargs + nb_iargs);
+         tcg_out_mb(s, a0);
--
+         break;
--        /* Do copy propagation */
--        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
++    case INDEX_op_extract_i64:
--            TCGTemp *ts = arg_temp(op->args[i]);
++        tcg_debug_assert(a2 + args[3] == 32);
--            if (ts && ts_is_copy(ts)) {
++        tcg_out_arithi(s, a0, a1, a2, SHIFT_SRL);
--                op->args[i] = temp_arg(find_better_copy(s, ts));
++        break;
--            }
++    case INDEX_op_sextract_i64:
--        }
++        tcg_debug_assert(a2 + args[3] == 32);
-+        copy_propagate(&ctx, op, nb_oargs, nb_iargs);
++        tcg_out_arithi(s, a0, a1, a2, SHIFT_SRA);
++        break;
-         /* For commutative operations make constant second argument */
++
-         switch (opc) {
+     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
      case INDEX_op_mov_i64:
      case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
      case INDEX_op_ext32u_i64:
      case INDEX_op_ext_i32_i64:
      case INDEX_op_extu_i32_i64:
 +    case INDEX_op_extract_i64:
 +    case INDEX_op_sextract_i64:
      case INDEX_op_qemu_ld_a32_i32:
      case INDEX_op_qemu_ld_a64_i32:
      case INDEX_op_qemu_ld_a32_i64:
 --
-.25.1
+.43.0

-[PULL 04/56] host-utils: add 128-bit quotient support to divu128/divs128
+[PULL 59/68] tcg/tci: Provide TCG_TARGET_{s}extract_valid
-From: Luis Pires <luis.pires@eldorado.org.br>
+Trivially mirrors TCG_TARGET_HAS_{s}extract_*.
-These will be used to implement new decimal floating point
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 instructions from Power ISA 3.1.
 The remainder is now returned directly by divu128/divs128,
 freeing up phigh to receive the high 64 bits of the quotient.
 Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/hw/clock.h        |   6 +-
+ tcg/tci/tcg-target-has.h | 3 +++
- include/qemu/host-utils.h |  20 ++++--
+file changed, 3 insertions(+)
  target/ppc/int_helper.c   |   9 +--
  util/host-utils.c         | 133 +++++++++++++++++++++++++-------------
 files changed, 108 insertions(+), 60 deletions(-)
-diff --git a/include/hw/clock.h b/include/hw/clock.h
+diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/hw/clock.h
+--- a/tcg/tci/tcg-target-has.h
-+++ b/include/hw/clock.h
++++ b/tcg/tci/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
+@@ -XXX,XX +XXX,XX @@
-     if (clk->period == 0) {
-         return 0;
+ #define TCG_TARGET_HAS_tst              1
-     }
--    /*
++#define TCG_TARGET_extract_valid(type, ofs, len)   1
--     * BUG: when CONFIG_INT128 is not defined, the current implementation of
++#define TCG_TARGET_sextract_valid(type, ofs, len)  1
 -     * divu128 does not return a valid truncated quotient, so the result will
 -     * be wrong.
 -     */
 +
-     divu128(&lo, &hi, clk->period);
-     return lo;
- }
-diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/qemu/host-utils.h
-+++ b/include/qemu/host-utils.h
-@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
-     return (__int128_t)a * b / c;
- }
--static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
-+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
-+                               uint64_t divisor)
- {
-     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
-     __uint128_t result = dividend / divisor;
-+
-     *plow = result;
--    *phigh = dividend % divisor;
-+    *phigh = result >> 64;
-+    return dividend % divisor;
- }
--static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
-+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
-+                              int64_t divisor)
- {
--    __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
-+    __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
-     __int128_t result = dividend / divisor;
-+
-     *plow = result;
--    *phigh = dividend % divisor;
-+    *phigh = result >> 64;
-+    return dividend % divisor;
- }
- #else
- void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
- void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
--void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
--void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
-+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
-+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
- static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
- {
-diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/ppc/int_helper.c
-+++ b/target/ppc/int_helper.c
-@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
- uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
- {
--    int64_t rt = 0;
-+    uint64_t rt = 0;
-     int64_t ra = (int64_t)rau;
-     int64_t rb = (int64_t)rbu;
-     int overflow = 0;
-@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
-     int cr;
-     uint64_t lo_value;
-     uint64_t hi_value;
-+    uint64_t rem;
-     ppc_avr_t ret = { .u64 = { 0, 0 } };
-     if (b->VsrSD(0) < 0) {
-@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
-          * In that case, we leave r unchanged.
-          */
-     } else {
--        divu128(&lo_value, &hi_value, 1000000000000000ULL);
-+        rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
--        for (i = 1; i < 16; hi_value /= 10, i++) {
--            bcd_put_digit(&ret, hi_value % 10, i);
-+        for (i = 1; i < 16; rem /= 10, i++) {
-+            bcd_put_digit(&ret, rem % 10, i);
-         }
-         for (; i < 32; lo_value /= 10, i++) {
-diff --git a/util/host-utils.c b/util/host-utils.c
-index XXXXXXX..XXXXXXX 100644
---- a/util/host-utils.c
-+++ b/util/host-utils.c
-@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
- }
- /*
-- * Unsigned 128-by-64 division. Returns quotient via plow and
-- * remainder via phigh.
-- * The result must fit in 64 bits (plow) - otherwise, the result
-- * is undefined.
-- * This function will cause a division by zero if passed a zero divisor.
-+ * Unsigned 128-by-64 division.
-+ * Returns the remainder.
-+ * Returns quotient via plow and phigh.
-+ * Also returns the remainder via the function return value.
-  */
--void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
-+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
- {
-     uint64_t dhi = *phigh;
-     uint64_t dlo = *plow;
--    unsigned i;
--    uint64_t carry = 0;
-+    uint64_t rem, dhighest;
-+    int sh;
-     if (divisor == 0 || dhi == 0) {
-         *plow  = dlo / divisor;
--        *phigh = dlo % divisor;
-+        *phigh = 0;
-+        return dlo % divisor;
-     } else {
-+        sh = clz64(divisor);
--        for (i = 0; i < 64; i++) {
--            carry = dhi >> 63;
--            dhi = (dhi << 1) | (dlo >> 63);
--            if (carry || (dhi >= divisor)) {
--                dhi -= divisor;
--                carry = 1;
--            } else {
--                carry = 0;
-+        if (dhi < divisor) {
-+            if (sh != 0) {
-+                /* normalize the divisor, shifting the dividend accordingly */
-+                divisor <<= sh;
-+                dhi = (dhi << sh) | (dlo >> (64 - sh));
-+                dlo <<= sh;
-             }
--            dlo = (dlo << 1) | carry;
-+
-+            *phigh = 0;
-+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
-+        } else {
-+            if (sh != 0) {
-+                /* normalize the divisor, shifting the dividend accordingly */
-+                divisor <<= sh;
-+                dhighest = dhi >> (64 - sh);
-+                dhi = (dhi << sh) | (dlo >> (64 - sh));
-+                dlo <<= sh;
-+
-+                *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
-+            } else {
-+                /**
-+                 * dhi >= divisor
-+                 * Since the MSB of divisor is set (sh == 0),
-+                 * (dhi - divisor) < divisor
-+                 *
-+                 * Thus, the high part of the quotient is 1, and we can
-+                 * calculate the low part with a single call to udiv_qrnnd
-+                 * after subtracting divisor from dhi
-+                 */
-+                dhi -= divisor;
-+                *phigh = 1;
-+            }
-+
-+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
-         }
--        *plow = dlo;
--        *phigh = dhi;
-+        /*
-+         * since the dividend/divisor might have been normalized,
-+         * the remainder might also have to be shifted back
-+         */
-+        return rem >> sh;
-     }
- }
- /*
-- * Signed 128-by-64 division. Returns quotient via plow and
-- * remainder via phigh.
-- * The result must fit in 64 bits (plow) - otherwise, the result
-- * is undefined.
-- * This function will cause a division by zero if passed a zero divisor.
-+ * Signed 128-by-64 division.
-+ * Returns quotient via plow and phigh.
-+ * Also returns the remainder via the function return value.
-  */
--void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
-+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
- {
--    int sgn_dvdnd = *phigh < 0;
--    int sgn_divsr = divisor < 0;
-+    bool neg_quotient = false, neg_remainder = false;
-+    uint64_t unsig_hi = *phigh, unsig_lo = *plow;
-+    uint64_t rem;
--    if (sgn_dvdnd) {
--        *plow = ~(*plow);
--        *phigh = ~(*phigh);
--        if (*plow == (int64_t)-1) {
-+    if (*phigh < 0) {
-+        neg_quotient = !neg_quotient;
-+        neg_remainder = !neg_remainder;
-+
-+        if (unsig_lo == 0) {
-+            unsig_hi = -unsig_hi;
-+        } else {
-+            unsig_hi = ~unsig_hi;
-+            unsig_lo = -unsig_lo;
-+        }
-+    }
-+
-+    if (divisor < 0) {
-+        neg_quotient = !neg_quotient;
-+
-+        divisor = -divisor;
-+    }
-+
-+    rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
-+
-+    if (neg_quotient) {
-+        if (unsig_lo == 0) {
-+            *phigh = -unsig_hi;
-             *plow = 0;
--            (*phigh)++;
--         } else {
--            (*plow)++;
--         }
-+        } else {
-+            *phigh = ~unsig_hi;
-+            *plow = -unsig_lo;
-+        }
-+    } else {
-+        *phigh = unsig_hi;
-+        *plow = unsig_lo;
-     }
--    if (sgn_divsr) {
--        divisor = 0 - divisor;
--    }
--
--    divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
--
--    if (sgn_dvdnd  ^ sgn_divsr) {
--        *plow = 0 - *plow;
-+    if (neg_remainder) {
-+        return -rem;
-+    } else {
-+        return rem;
-     }
- }
  #endif
 --
-.25.1
+.43.0

-[PULL 11/56] tcg/optimize: Split out init_arguments
+[PULL 60/68] tcg/tci: Remove assertions for deposit and extract
-There was no real reason for calls to have separate code here.
+We already have these assertions during opcode creation.
 Unify init for calls vs non-calls using the call path, which
 handles TCG_CALL_DUMMY_ARG.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 25 +++++++++++--------------
+ tcg/tci/tcg-target.c.inc | 20 ++------------------
-file changed, 11 insertions(+), 14 deletions(-)
+file changed, 2 insertions(+), 18 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/tci/tcg-target.c.inc
-+++ b/tcg/optimize.c
++++ b/tcg/tci/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-     }
+         break;
- }
+     CASE_32_64(deposit)  /* Optional (TCG_TARGET_HAS_deposit_*). */
--static void init_arg_info(OptContext *ctx, TCGArg arg)
+-        {
--{
+-            TCGArg pos = args[3], len = args[4];
--    init_ts_info(ctx, arg_temp(arg));
+-            TCGArg max = opc == INDEX_op_deposit_i32 ? 32 : 64;
 -}
 -
- static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
+-            tcg_debug_assert(pos < max);
- {
+-            tcg_debug_assert(pos + len <= max);
-     TCGTemp *i, *g, *l;
+-
-@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+-            tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], pos, len);
-     return false;
+-        }
- }
++        tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]);
+         break;
-+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
-+{
+     CASE_32_64(extract)  /* Optional (TCG_TARGET_HAS_extract_*). */
-+    for (int i = 0; i < nb_args; i++) {
+     CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
-+        TCGTemp *ts = arg_temp(op->args[i]);
+-        {
-+        if (ts) {
+-            TCGArg pos = args[2], len = args[3];
-+            init_ts_info(ctx, ts);
+-            TCGArg max = type == TCG_TYPE_I32 ? 32 : 64;
-+        }
+-
-+    }
+-            tcg_debug_assert(pos < max);
-+}
+-            tcg_debug_assert(pos + len <= max);
-+
+-
- /* Propagate constants and copies, fold constant expressions. */
+-            tcg_out_op_rrbb(s, opc, args[0], args[1], pos, len);
- void tcg_optimize(TCGContext *s)
+-        }
- {
++        tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]);
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+         break;
-         if (opc == INDEX_op_call) {
-             nb_oargs = TCGOP_CALLO(op);
+     CASE_32_64(brcond)
              nb_iargs = TCGOP_CALLI(op);
 -            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 -                TCGTemp *ts = arg_temp(op->args[i]);
 -                if (ts) {
 -                    init_ts_info(&ctx, ts);
 -                }
 -            }
          } else {
              nb_oargs = def->nb_oargs;
              nb_iargs = def->nb_iargs;
 -            for (i = 0; i < nb_oargs + nb_iargs; i++) {
 -                init_arg_info(&ctx, op->args[i]);
 -            }
          }
 +        init_arguments(&ctx, op, nb_oargs + nb_iargs);
          /* Do copy propagation */
          for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 --
-.25.1
+.43.0

-[PULL 52/56] tcg/optimize: Optimize sign extensions
+[PULL 61/68] tcg: Remove TCG_TARGET_HAS_{s}extract_{i32,i64}
-Certain targets, like riscv, produce signed 32-bit results.
+Make extract and sextract "unconditional" in the sense
-This can lead to lots of redundant extensions as values are
+that the opcodes are always present.  Rely instead on
-manipulated.
+TCG_TARGET_HAS_{s}extract_valid, now always defined.
-Begin by tracking only the obvious sign-extensions, and
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 converting them to simple copies when possible.
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
+ tcg/aarch64/tcg-target-has.h     |  4 ----
-file changed, 102 insertions(+), 21 deletions(-)
+ tcg/arm/tcg-target-has.h         |  2 --
  tcg/i386/tcg-target-has.h        |  4 ----
  tcg/loongarch64/tcg-target-has.h |  4 ----
  tcg/mips/tcg-target-has.h        |  4 ----
  tcg/ppc/tcg-target-has.h         |  4 ----
  tcg/riscv/tcg-target-has.h       |  4 ----
  tcg/s390x/tcg-target-has.h       |  4 ----
  tcg/sparc64/tcg-target-has.h     |  4 ----
  tcg/tcg-has.h                    | 12 ------------
  tcg/tci/tcg-target-has.h         |  4 ----
  tcg/optimize.c                   |  8 ++++----
  tcg/tcg.c                        | 12 ++++--------
  tcg/tci.c                        |  8 --------
 files changed, 8 insertions(+), 70 deletions(-)
+diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/aarch64/tcg-target-has.h
++++ b/tcg/aarch64/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_ctz_i32          1
+ #define TCG_TARGET_HAS_ctpop_i32        0
+ #define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     1
+ #define TCG_TARGET_HAS_negsetcond_i32   1
+ #define TCG_TARGET_HAS_add2_i32         1
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_ctz_i64          1
+ #define TCG_TARGET_HAS_ctpop_i64        0
+ #define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+ #define TCG_TARGET_HAS_extract2_i64     1
+ #define TCG_TARGET_HAS_negsetcond_i64   1
+ #define TCG_TARGET_HAS_add2_i64         1
+diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/arm/tcg-target-has.h
++++ b/tcg/arm/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
+ #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
+ #define TCG_TARGET_HAS_ctpop_i32        0
+ #define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     1
+ #define TCG_TARGET_HAS_negsetcond_i32   1
+ #define TCG_TARGET_HAS_mulu2_i32        1
+diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/i386/tcg-target-has.h
++++ b/tcg/i386/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_ctz_i32          1
+ #define TCG_TARGET_HAS_ctpop_i32        have_popcnt
+ #define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     1
+ #define TCG_TARGET_HAS_negsetcond_i32   1
+ #define TCG_TARGET_HAS_add2_i32         1
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_ctz_i64          1
+ #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
+ #define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+ #define TCG_TARGET_HAS_extract2_i64     1
+ #define TCG_TARGET_HAS_negsetcond_i64   1
+ #define TCG_TARGET_HAS_add2_i64         1
+diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/loongarch64/tcg-target-has.h
++++ b/tcg/loongarch64/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_div2_i32         0
+ #define TCG_TARGET_HAS_rot_i32          1
+ #define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     0
+ #define TCG_TARGET_HAS_add2_i32         0
+ #define TCG_TARGET_HAS_sub2_i32         0
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_div2_i64         0
+ #define TCG_TARGET_HAS_rot_i64          1
+ #define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+ #define TCG_TARGET_HAS_extract2_i64     0
+ #define TCG_TARGET_HAS_extr_i64_i32     1
+ #define TCG_TARGET_HAS_ext8s_i64        1
+diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/mips/tcg-target-has.h
++++ b/tcg/mips/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
+ /* optional instructions detected at runtime */
+ #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     0
+ #define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
+ #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
+@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
+ #define TCG_TARGET_HAS_bswap32_i64      1
+ #define TCG_TARGET_HAS_bswap64_i64      1
+ #define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+ #define TCG_TARGET_HAS_extract2_i64     0
+ #define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
+ #define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
+diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/ppc/tcg-target-has.h
++++ b/tcg/ppc/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
+ #define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
+ #define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     0
+ #define TCG_TARGET_HAS_negsetcond_i32   1
+ #define TCG_TARGET_HAS_mulu2_i32        0
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
+ #define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
+ #define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+ #define TCG_TARGET_HAS_extract2_i64     0
+ #define TCG_TARGET_HAS_negsetcond_i64   1
+ #define TCG_TARGET_HAS_add2_i64         1
+diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/riscv/tcg-target-has.h
++++ b/tcg/riscv/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_div2_i32         0
+ #define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
+ #define TCG_TARGET_HAS_deposit_i32      0
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     0
+ #define TCG_TARGET_HAS_add2_i32         1
+ #define TCG_TARGET_HAS_sub2_i32         1
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_div2_i64         0
+ #define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
+ #define TCG_TARGET_HAS_deposit_i64      0
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+ #define TCG_TARGET_HAS_extract2_i64     0
+ #define TCG_TARGET_HAS_extr_i64_i32     1
+ #define TCG_TARGET_HAS_ext8s_i64        1
+diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/s390x/tcg-target-has.h
++++ b/tcg/s390x/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
+ #define TCG_TARGET_HAS_ctz_i32        0
+ #define TCG_TARGET_HAS_ctpop_i32      1
+ #define TCG_TARGET_HAS_deposit_i32    1
+-#define TCG_TARGET_HAS_extract_i32    1
+-#define TCG_TARGET_HAS_sextract_i32   1
+ #define TCG_TARGET_HAS_extract2_i32   0
+ #define TCG_TARGET_HAS_negsetcond_i32 1
+ #define TCG_TARGET_HAS_add2_i32       1
+@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
+ #define TCG_TARGET_HAS_ctz_i64        0
+ #define TCG_TARGET_HAS_ctpop_i64      1
+ #define TCG_TARGET_HAS_deposit_i64    1
+-#define TCG_TARGET_HAS_extract_i64    1
+-#define TCG_TARGET_HAS_sextract_i64   1
+ #define TCG_TARGET_HAS_extract2_i64   0
+ #define TCG_TARGET_HAS_negsetcond_i64 1
+ #define TCG_TARGET_HAS_add2_i64       1
+diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/sparc64/tcg-target-has.h
++++ b/tcg/sparc64/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
+ #define TCG_TARGET_HAS_ctz_i32          0
+ #define TCG_TARGET_HAS_ctpop_i32        0
+ #define TCG_TARGET_HAS_deposit_i32      0
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     0
+ #define TCG_TARGET_HAS_negsetcond_i32   1
+ #define TCG_TARGET_HAS_add2_i32         1
+@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
+ #define TCG_TARGET_HAS_ctz_i64          0
+ #define TCG_TARGET_HAS_ctpop_i64        0
+ #define TCG_TARGET_HAS_deposit_i64      0
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+ #define TCG_TARGET_HAS_extract2_i64     0
+ #define TCG_TARGET_HAS_negsetcond_i64   1
+ #define TCG_TARGET_HAS_add2_i64         1
+diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg-has.h
++++ b/tcg/tcg-has.h
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_ctz_i64          0
+ #define TCG_TARGET_HAS_ctpop_i64        0
+ #define TCG_TARGET_HAS_deposit_i64      0
+-#define TCG_TARGET_HAS_extract_i64      0
+-#define TCG_TARGET_HAS_sextract_i64     0
+ #define TCG_TARGET_HAS_extract2_i64     0
+ #define TCG_TARGET_HAS_negsetcond_i64   0
+ #define TCG_TARGET_HAS_add2_i64         0
+@@ -XXX,XX +XXX,XX @@
+ #ifndef TCG_TARGET_deposit_i64_valid
+ #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
+ #endif
+-#ifndef TCG_TARGET_extract_valid
+-#define TCG_TARGET_extract_valid(type, ofs, len) \
+-    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
+-     : TCG_TARGET_HAS_extract_i64)
+-#endif
+-#ifndef TCG_TARGET_sextract_valid
+-#define TCG_TARGET_sextract_valid(type, ofs, len) \
+-    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
+-     : TCG_TARGET_HAS_sextract_i64)
+-#endif
+ /* Only one of DIV or DIV2 should be defined.  */
+ #if defined(TCG_TARGET_HAS_div_i32)
+diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tci/tcg-target-has.h
++++ b/tcg/tci/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_ext16u_i32       1
+ #define TCG_TARGET_HAS_andc_i32         1
+ #define TCG_TARGET_HAS_deposit_i32      1
+-#define TCG_TARGET_HAS_extract_i32      1
+-#define TCG_TARGET_HAS_sextract_i32     1
+ #define TCG_TARGET_HAS_extract2_i32     0
+ #define TCG_TARGET_HAS_eqv_i32          1
+ #define TCG_TARGET_HAS_nand_i32         1
+@@ -XXX,XX +XXX,XX @@
+ #define TCG_TARGET_HAS_bswap32_i64      1
+ #define TCG_TARGET_HAS_bswap64_i64      1
+ #define TCG_TARGET_HAS_deposit_i64      1
+-#define TCG_TARGET_HAS_extract_i64      1
+-#define TCG_TARGET_HAS_sextract_i64     1
+ #define TCG_TARGET_HAS_extract2_i64     0
+ #define TCG_TARGET_HAS_div_i64          1
+ #define TCG_TARGET_HAS_rem_i64          1
 diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/optimize.c
 +++ b/tcg/optimize.c
-@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
+@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
-     TCGTemp *next_copy;
+         shr_opc = INDEX_op_shr_i32;
-     uint64_t val;
+         neg_opc = INDEX_op_neg_i32;
-     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
+         if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
-+    uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
+-            uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
- } TempOptInfo;
++            uext_opc = INDEX_op_extract_i32;
  typedef struct OptContext {
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
      /* In flight values from optimization. */
      uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
      uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
 +    uint64_t s_mask;  /* mask of clrsb(value) bits */
      TCGType type;
  } OptContext;
 +/* Calculate the smask for a specific value. */
 +static uint64_t smask_from_value(uint64_t value)
 +{
 +    int rep = clrsb64(value);
 +    return ~(~0ull >> rep);
 +}
 +
 +/*
 + * Calculate the smask for a given set of known-zeros.
 + * If there are lots of zeros on the left, we can consider the remainder
 + * an unsigned field, and thus the corresponding signed field is one bit
 + * larger.
 + */
 +static uint64_t smask_from_zmask(uint64_t zmask)
 +{
 +    /*
 +     * Only the 0 bits are significant for zmask, thus the msb itself
 +     * must be zero, else we have no sign information.
 +     */
 +    int rep = clz64(zmask);
 +    if (rep == 0) {
 +        return 0;
 +    }
 +    rep -= 1;
 +    return ~(~0ull >> rep);
 +}
 +
  static inline TempOptInfo *ts_info(TCGTemp *ts)
  {
      return ts->state_ptr;
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
      ti->prev_copy = ts;
      ti->is_const = false;
      ti->z_mask = -1;
 +    ti->s_mask = 0;
  }
  static void reset_temp(TCGArg arg)
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
          ti->is_const = true;
          ti->val = ts->val;
          ti->z_mask = ts->val;
 +        ti->s_mask = smask_from_value(ts->val);
      } else {
          ti->is_const = false;
          ti->z_mask = -1;
 +        ti->s_mask = 0;
      }
  }
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
      op->args[1] = src;
      di->z_mask = si->z_mask;
 +    di->s_mask = si->s_mask;
      if (src_ts->type == dst_ts->type) {
          TempOptInfo *ni = ts_info(si->next_copy);
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
      nb_oargs = def->nb_oargs;
      for (i = 0; i < nb_oargs; i++) {
 -        reset_temp(op->args[i]);
 +        TCGTemp *ts = arg_temp(op->args[i]);
 +        reset_ts(ts);
          /*
 -         * Save the corresponding known-zero bits mask for the
 +         * Save the corresponding known-zero/sign bits mask for the
           * first output argument (only one supported so far).
           */
          if (i == 0) {
 -            arg_info(op->args[i])->z_mask = ctx->z_mask;
 +            ts_info(ts)->z_mask = ctx->z_mask;
 +            ts_info(ts)->s_mask = ctx->s_mask;
          }
-     }
+         if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
- }
+-            sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
-@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
++            sext_opc = INDEX_op_sextract_i32;
- {
+         }
-     uint64_t a_mask = ctx->a_mask;
+         break;
-     uint64_t z_mask = ctx->z_mask;
+     case TCG_TYPE_I64:
-+    uint64_t s_mask = ctx->s_mask;
+@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
+         shr_opc = INDEX_op_shr_i64;
-     /*
+         neg_opc = INDEX_op_neg_i64;
-      * 32-bit ops generate 32-bit results, which for the purpose of
+         if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
-@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
+-            uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
-     if (ctx->type == TCG_TYPE_I32) {
++            uext_opc = INDEX_op_extract_i64;
-         a_mask = (int32_t)a_mask;
+         }
-         z_mask = (int32_t)z_mask;
+         if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
-+        s_mask |= MAKE_64BIT_MASK(32, 32);
+-            sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
-         ctx->z_mask = z_mask;
++            sext_opc = INDEX_op_sextract_i64;
 +        ctx->s_mask = s_mask;
      }
      if (z_mask == 0) {
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
  static bool fold_bswap(OptContext *ctx, TCGOp *op)
  {
 -    uint64_t z_mask, sign;
 +    uint64_t z_mask, s_mask, sign;
      if (arg_is_const(op->args[1])) {
          uint64_t t = arg_info(op->args[1])->val;
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
      }
      z_mask = arg_info(op->args[1])->z_mask;
 +
      switch (op->opc) {
      case INDEX_op_bswap16_i32:
      case INDEX_op_bswap16_i64:
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
      default:
          g_assert_not_reached();
      }
 +    s_mask = smask_from_zmask(z_mask);
      switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
      case TCG_BSWAP_OZ:
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
          /* If the sign bit may be 1, force all the bits above to 1. */
          if (z_mask & sign) {
              z_mask |= sign;
 +            s_mask = sign << 1;
          }
          break;
      default:
-         /* The high bits are undefined: force all bits above the sign to 1. */
+diff --git a/tcg/tcg.c b/tcg/tcg.c
-         z_mask |= sign << 1;
+index XXXXXXX..XXXXXXX 100644
-+        s_mask = 0;
+--- a/tcg/tcg.c
-         break;
++++ b/tcg/tcg.c
-     }
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
-     ctx->z_mask = z_mask;
+     case INDEX_op_shl_i32:
-+    ctx->s_mask = s_mask;
+     case INDEX_op_shr_i32:
+     case INDEX_op_sar_i32:
-     return fold_masks(ctx, op);
++    case INDEX_op_extract_i32:
- }
++    case INDEX_op_sextract_i32:
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
  static bool fold_extract(OptContext *ctx, TCGOp *op)
  {
      uint64_t z_mask_old, z_mask;
 +    int pos = op->args[2];
 +    int len = op->args[3];
      if (arg_is_const(op->args[1])) {
          uint64_t t;
          t = arg_info(op->args[1])->val;
 -        t = extract64(t, op->args[2], op->args[3]);
 +        t = extract64(t, pos, len);
          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
      }
      z_mask_old = arg_info(op->args[1])->z_mask;
 -    z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
 -    if (op->args[2] == 0) {
 +    z_mask = extract64(z_mask_old, pos, len);
 +    if (pos == 0) {
          ctx->a_mask = z_mask_old ^ z_mask;
      }
      ctx->z_mask = z_mask;
 +    ctx->s_mask = smask_from_zmask(z_mask);
      return fold_masks(ctx, op);
  }
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
  static bool fold_exts(OptContext *ctx, TCGOp *op)
  {
 -    uint64_t z_mask_old, z_mask, sign;
 +    uint64_t s_mask_old, s_mask, z_mask, sign;
      bool type_change = false;
      if (fold_const1(ctx, op)) {
          return true;
-     }
+     case INDEX_op_negsetcond_i32:
--    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
-+    z_mask = arg_info(op->args[1])->z_mask;
+         return TCG_TARGET_HAS_rot_i32;
-+    s_mask = arg_info(op->args[1])->s_mask;
+     case INDEX_op_deposit_i32:
-+    s_mask_old = s_mask;
+         return TCG_TARGET_HAS_deposit_i32;
+-    case INDEX_op_extract_i32:
-     switch (op->opc) {
+-        return TCG_TARGET_HAS_extract_i32;
-     CASE_OP_32_64(ext8s):
+-    case INDEX_op_sextract_i32:
-@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
+-        return TCG_TARGET_HAS_sextract_i32;
+     case INDEX_op_extract2_i32:
-     if (z_mask & sign) {
+         return TCG_TARGET_HAS_extract2_i32;
-         z_mask |= sign;
+     case INDEX_op_add2_i32:
--    } else if (!type_change) {
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
--        ctx->a_mask = z_mask_old ^ z_mask;
+     case INDEX_op_sar_i64:
-     }
+     case INDEX_op_ext_i32_i64:
-+    s_mask |= sign << 1;
+     case INDEX_op_extu_i32_i64:
-+
++    case INDEX_op_extract_i64:
-     ctx->z_mask = z_mask;
++    case INDEX_op_sextract_i64:
-+    ctx->s_mask = s_mask;
+         return TCG_TARGET_REG_BITS == 64;
-+    if (!type_change) {
-+        ctx->a_mask = s_mask & ~s_mask_old;
+     case INDEX_op_negsetcond_i64:
-+    }
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
+         return TCG_TARGET_HAS_rot_i64;
-     return fold_masks(ctx, op);
+     case INDEX_op_deposit_i64:
- }
+         return TCG_TARGET_HAS_deposit_i64;
-@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
+-    case INDEX_op_extract_i64:
-     }
+-        return TCG_TARGET_HAS_extract_i64;
+-    case INDEX_op_sextract_i64:
-     ctx->z_mask = z_mask;
+-        return TCG_TARGET_HAS_sextract_i64;
-+    ctx->s_mask = smask_from_zmask(z_mask);
+     case INDEX_op_extract2_i64:
-     if (!type_change) {
+         return TCG_TARGET_HAS_extract2_i64;
-         ctx->a_mask = z_mask_old ^ z_mask;
+     case INDEX_op_extrl_i64_i32:
-     }
+diff --git a/tcg/tci.c b/tcg/tci.c
-@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
+index XXXXXXX..XXXXXXX 100644
-     MemOp mop = get_memop(oi);
+--- a/tcg/tci.c
-     int width = 8 * memop_size(mop);
++++ b/tcg/tci.c
+@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
--    if (!(mop & MO_SIGN) && width < 64) {
+             regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
--        ctx->z_mask = MAKE_64BIT_MASK(0, width);
+             break;
-+    if (width < 64) {
+ #endif
-+        ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
+-#if TCG_TARGET_HAS_extract_i32
-+        if (!(mop & MO_SIGN)) {
+         case INDEX_op_extract_i32:
-+            ctx->z_mask = MAKE_64BIT_MASK(0, width);
+             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
-+            ctx->s_mask <<= 1;
+             regs[r0] = extract32(regs[r1], pos, len);
-+        }
+             break;
-     }
+-#endif
+-#if TCG_TARGET_HAS_sextract_i32
-     /* Opcodes that touch guest memory stop the mb optimization.  */
+         case INDEX_op_sextract_i32:
-@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
+             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+             regs[r0] = sextract32(regs[r1], pos, len);
- static bool fold_sextract(OptContext *ctx, TCGOp *op)
+             break;
- {
+-#endif
--    int64_t z_mask_old, z_mask;
+         case INDEX_op_brcond_i32:
-+    uint64_t z_mask, s_mask, s_mask_old;
+             tci_args_rl(insn, tb_ptr, &r0, &ptr);
-+    int pos = op->args[2];
+             if ((uint32_t)regs[r0]) {
-+    int len = op->args[3];
+@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
+             regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
-     if (arg_is_const(op->args[1])) {
+             break;
-         uint64_t t;
+ #endif
+-#if TCG_TARGET_HAS_extract_i64
-         t = arg_info(op->args[1])->val;
+         case INDEX_op_extract_i64:
--        t = sextract64(t, op->args[2], op->args[3]);
+             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
-+        t = sextract64(t, pos, len);
+             regs[r0] = extract64(regs[r1], pos, len);
-         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+             break;
-     }
+-#endif
+-#if TCG_TARGET_HAS_sextract_i64
--    z_mask_old = arg_info(op->args[1])->z_mask;
+         case INDEX_op_sextract_i64:
--    z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
+             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
--    if (op->args[2] == 0 && z_mask >= 0) {
+             regs[r0] = sextract64(regs[r1], pos, len);
--        ctx->a_mask = z_mask_old ^ z_mask;
+             break;
--    }
+-#endif
-+    z_mask = arg_info(op->args[1])->z_mask;
+         case INDEX_op_brcond_i64:
-+    z_mask = sextract64(z_mask, pos, len);
+             tci_args_rl(insn, tb_ptr, &r0, &ptr);
-     ctx->z_mask = z_mask;
+             if (regs[r0]) {
 +    s_mask_old = arg_info(op->args[1])->s_mask;
 +    s_mask = sextract64(s_mask_old, pos, len);
 +    s_mask |= MAKE_64BIT_MASK(len, 64 - len);
 +    ctx->s_mask = s_mask;
 +
 +    if (pos == 0) {
 +        ctx->a_mask = s_mask & ~s_mask_old;
 +    }
 +
      return fold_masks(ctx, op);
  }
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
  {
      /* We can't do any folding with a load, but we can record bits. */
      switch (op->opc) {
 +    CASE_OP_32_64(ld8s):
 +        ctx->s_mask = MAKE_64BIT_MASK(8, 56);
 +        break;
      CASE_OP_32_64(ld8u):
          ctx->z_mask = MAKE_64BIT_MASK(0, 8);
 +        ctx->s_mask = MAKE_64BIT_MASK(9, 55);
 +        break;
 +    CASE_OP_32_64(ld16s):
 +        ctx->s_mask = MAKE_64BIT_MASK(16, 48);
          break;
      CASE_OP_32_64(ld16u):
          ctx->z_mask = MAKE_64BIT_MASK(0, 16);
 +        ctx->s_mask = MAKE_64BIT_MASK(17, 47);
 +        break;
 +    case INDEX_op_ld32s_i64:
 +        ctx->s_mask = MAKE_64BIT_MASK(32, 32);
          break;
      case INDEX_op_ld32u_i64:
          ctx->z_mask = MAKE_64BIT_MASK(0, 32);
 +        ctx->s_mask = MAKE_64BIT_MASK(33, 31);
          break;
      default:
          g_assert_not_reached();
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              ctx.type = TCG_TYPE_I32;
          }
 -        /* Assume all bits affected, and no bits known zero. */
 +        /* Assume all bits affected, no bits known zero, no sign reps. */
          ctx.a_mask = -1;
          ctx.z_mask = -1;
 +        ctx.s_mask = 0;
          /*
           * Process each opcode.
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          case INDEX_op_extrh_i64_i32:
              done = fold_extu(&ctx, op);
              break;
 +        CASE_OP_32_64(ld8s):
          CASE_OP_32_64(ld8u):
 +        CASE_OP_32_64(ld16s):
          CASE_OP_32_64(ld16u):
 +        case INDEX_op_ld32s_i64:
          case INDEX_op_ld32u_i64:
              done = fold_tcg_ld(&ctx, op);
              break;
 --
-.25.1
+.43.0

-[PULL 07/56] tcg/optimize: Split out OptContext
+[PULL 62/68] tcg: Remove TCG_TARGET_HAS_deposit_{i32,i64}
-Provide what will become a larger context for splitting
+Make deposit "unconditional" in the sense that the opcode is
-the very large tcg_optimize function.
+always present.  Rely instead on TCG_TARGET_deposit_valid,
 now always defined.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
+ tcg/aarch64/tcg-target-has.h     |  3 +--
-file changed, 40 insertions(+), 37 deletions(-)
+ tcg/arm/tcg-target-has.h         |  2 +-
  tcg/i386/tcg-target-has.h        |  5 +----
  tcg/loongarch64/tcg-target-has.h |  3 +--
  tcg/mips/tcg-target-has.h        |  3 +--
  tcg/ppc/tcg-target-has.h         |  3 +--
  tcg/riscv/tcg-target-has.h       |  4 ++--
  tcg/s390x/tcg-target-has.h       |  3 +--
  tcg/sparc64/tcg-target-has.h     |  4 ++--
  tcg/tcg-has.h                    |  8 --------
  tcg/tci/tcg-target-has.h         |  3 +--
  tcg/tcg-op.c                     | 22 +++++++++++-----------
  tcg/tcg.c                        | 31 +++++++++++--------------------
  tcg/tci.c                        |  4 ----
  tcg/tci/tcg-target.c.inc         |  2 +-
 files changed, 35 insertions(+), 65 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/aarch64/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/aarch64/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
+@@ -XXX,XX +XXX,XX @@
-     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
+ #define TCG_TARGET_HAS_clz_i32          1
- } TempOptInfo;
+ #define TCG_TARGET_HAS_ctz_i32          1
+ #define TCG_TARGET_HAS_ctpop_i32        0
-+typedef struct OptContext {
+-#define TCG_TARGET_HAS_deposit_i32      1
-+    TCGTempSet temps_used;
+ #define TCG_TARGET_HAS_extract2_i32     1
-+} OptContext;
+ #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_clz_i64          1
  #define TCG_TARGET_HAS_ctz_i64          1
  #define TCG_TARGET_HAS_ctpop_i64        0
 -#define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract2_i64     1
  #define TCG_TARGET_HAS_negsetcond_i64   1
  #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_extract_valid(type, ofs, len)   1
  #define TCG_TARGET_sextract_valid(type, ofs, len)  1
 +#define TCG_TARGET_deposit_valid(type, ofs, len)   1
  #endif
 diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target-has.h
 +++ b/tcg/arm/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
  #define TCG_TARGET_HAS_clz_i32          1
  #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
  #define TCG_TARGET_HAS_ctpop_i32        0
 -#define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
  #define TCG_TARGET_HAS_extract2_i32     1
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_mulu2_i32        1
@@ -XXX,XX +XXX,XX @@ tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
  #define TCG_TARGET_extract_valid   tcg_target_extract_valid
  #define TCG_TARGET_sextract_valid  tcg_target_extract_valid
 +#define TCG_TARGET_deposit_valid(type, ofs, len)  use_armv7_instructions
  #endif
 diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target-has.h
 +++ b/tcg/i386/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_clz_i32          1
  #define TCG_TARGET_HAS_ctz_i32          1
  #define TCG_TARGET_HAS_ctpop_i32        have_popcnt
 -#define TCG_TARGET_HAS_deposit_i32      1
  #define TCG_TARGET_HAS_extract2_i32     1
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_clz_i64          1
  #define TCG_TARGET_HAS_ctz_i64          1
  #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
 -#define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract2_i64     1
  #define TCG_TARGET_HAS_negsetcond_i64   1
  #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_cmpsel_vec       1
  #define TCG_TARGET_HAS_tst_vec          have_avx512bw
 -#define TCG_TARGET_deposit_i32_valid(ofs, len) \
 +#define TCG_TARGET_deposit_valid(type, ofs, len) \
      (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
       (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
 -#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
  /*
   * Check for the possibility of low byte/word extraction, high-byte extraction
 diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target-has.h
 +++ b/tcg/loongarch64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_rem_i32          1
  #define TCG_TARGET_HAS_div2_i32         0
  #define TCG_TARGET_HAS_rot_i32          1
 -#define TCG_TARGET_HAS_deposit_i32      1
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_add2_i32         0
  #define TCG_TARGET_HAS_sub2_i32         0
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_rem_i64          1
  #define TCG_TARGET_HAS_div2_i64         0
  #define TCG_TARGET_HAS_rot_i64          1
 -#define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_extr_i64_i32     1
  #define TCG_TARGET_HAS_ext8s_i64        1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_tst_vec          0
  #define TCG_TARGET_extract_valid(type, ofs, len)   1
 +#define TCG_TARGET_deposit_valid(type, ofs, len)   1
  static inline bool
  tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target-has.h
 +++ b/tcg/mips/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
  #endif
  /* optional instructions detected at runtime */
 -#define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
  #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
  #define TCG_TARGET_HAS_bswap16_i64      1
  #define TCG_TARGET_HAS_bswap32_i64      1
  #define TCG_TARGET_HAS_bswap64_i64      1
 -#define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
  #define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
  #define TCG_TARGET_HAS_tst              0
  #define TCG_TARGET_extract_valid(type, ofs, len)  use_mips32r2_instructions
 +#define TCG_TARGET_deposit_valid(type, ofs, len)  use_mips32r2_instructions
  static inline bool
  tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target-has.h
 +++ b/tcg/ppc/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_clz_i32          1
  #define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
  #define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
 -#define TCG_TARGET_HAS_deposit_i32      1
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_mulu2_i32        0
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_clz_i64          1
  #define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
  #define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
 -#define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_negsetcond_i64   1
  #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_tst_vec          0
  #define TCG_TARGET_extract_valid(type, ofs, len)   1
 +#define TCG_TARGET_deposit_valid(type, ofs, len)   1
  static inline bool
  tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target-has.h
 +++ b/tcg/riscv/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_rem_i32          1
  #define TCG_TARGET_HAS_div2_i32         0
  #define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
 -#define TCG_TARGET_HAS_deposit_i32      0
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_add2_i32         1
  #define TCG_TARGET_HAS_sub2_i32         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_rem_i64          1
  #define TCG_TARGET_HAS_div2_i64         0
  #define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
 -#define TCG_TARGET_HAS_deposit_i64      0
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_extr_i64_i32     1
  #define TCG_TARGET_HAS_ext8s_i64        1
@@ -XXX,XX +XXX,XX @@ tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
  }
  #define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 +#define TCG_TARGET_deposit_valid(type, ofs, len)  0
 +
- static inline TempOptInfo *ts_info(TCGTemp *ts)
+ #endif
- {
+diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
-     return ts->state_ptr;
+index XXXXXXX..XXXXXXX 100644
-@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
+--- a/tcg/s390x/tcg-target-has.h
- }
++++ b/tcg/s390x/tcg-target-has.h
+@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
- /* Initialize and activate a temporary.  */
+ #define TCG_TARGET_HAS_clz_i32        0
--static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
+ #define TCG_TARGET_HAS_ctz_i32        0
-+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
+ #define TCG_TARGET_HAS_ctpop_i32      1
- {
+-#define TCG_TARGET_HAS_deposit_i32    1
-     size_t idx = temp_idx(ts);
+ #define TCG_TARGET_HAS_extract2_i32   0
-     TempOptInfo *ti;
+ #define TCG_TARGET_HAS_negsetcond_i32 1
+ #define TCG_TARGET_HAS_add2_i32       1
--    if (test_bit(idx, temps_used->l)) {
+@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
-+    if (test_bit(idx, ctx->temps_used.l)) {
+ #define TCG_TARGET_HAS_clz_i64        1
  #define TCG_TARGET_HAS_ctz_i64        0
  #define TCG_TARGET_HAS_ctpop_i64      1
 -#define TCG_TARGET_HAS_deposit_i64    1
  #define TCG_TARGET_HAS_extract2_i64   0
  #define TCG_TARGET_HAS_negsetcond_i64 1
  #define TCG_TARGET_HAS_add2_i64       1
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
  #define TCG_TARGET_HAS_tst_vec        0
  #define TCG_TARGET_extract_valid(type, ofs, len)   1
 +#define TCG_TARGET_deposit_valid(type, ofs, len)   1
  static inline bool
  tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc64/tcg-target-has.h
 +++ b/tcg/sparc64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
  #define TCG_TARGET_HAS_clz_i32          0
  #define TCG_TARGET_HAS_ctz_i32          0
  #define TCG_TARGET_HAS_ctpop_i32        0
 -#define TCG_TARGET_HAS_deposit_i32      0
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
  #define TCG_TARGET_HAS_clz_i64          0
  #define TCG_TARGET_HAS_ctz_i64          0
  #define TCG_TARGET_HAS_ctpop_i64        0
 -#define TCG_TARGET_HAS_deposit_i64      0
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_negsetcond_i64   1
  #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
  #define TCG_TARGET_sextract_valid  TCG_TARGET_extract_valid
 +#define TCG_TARGET_deposit_valid(type, ofs, len) 0
 +
  #endif
 diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-has.h
 +++ b/tcg/tcg-has.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_clz_i64          0
  #define TCG_TARGET_HAS_ctz_i64          0
  #define TCG_TARGET_HAS_ctpop_i64        0
 -#define TCG_TARGET_HAS_deposit_i64      0
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_negsetcond_i64   0
  #define TCG_TARGET_HAS_add2_i64         0
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_sub2_i32         1
  #endif
 -#ifndef TCG_TARGET_deposit_i32_valid
 -#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
 -#endif
 -#ifndef TCG_TARGET_deposit_i64_valid
 -#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
 -#endif
 -
  /* Only one of DIV or DIV2 should be defined.  */
  #if defined(TCG_TARGET_HAS_div_i32)
  #define TCG_TARGET_HAS_div2_i32         0
 diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tci/tcg-target-has.h
 +++ b/tcg/tci/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_ext8u_i32        1
  #define TCG_TARGET_HAS_ext16u_i32       1
  #define TCG_TARGET_HAS_andc_i32         1
 -#define TCG_TARGET_HAS_deposit_i32      1
  #define TCG_TARGET_HAS_extract2_i32     0
  #define TCG_TARGET_HAS_eqv_i32          1
  #define TCG_TARGET_HAS_nand_i32         1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_HAS_bswap16_i64      1
  #define TCG_TARGET_HAS_bswap32_i64      1
  #define TCG_TARGET_HAS_bswap64_i64      1
 -#define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract2_i64     0
  #define TCG_TARGET_HAS_div_i64          1
  #define TCG_TARGET_HAS_rem_i64          1
@@ -XXX,XX +XXX,XX @@
  #define TCG_TARGET_extract_valid(type, ofs, len)   1
  #define TCG_TARGET_sextract_valid(type, ofs, len)  1
 +#define TCG_TARGET_deposit_valid(type, ofs, len)   1
  #endif
 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-op.c
 +++ b/tcg/tcg-op.c
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
          tcg_gen_mov_i32(ret, arg2);
          return;
      }
--    set_bit(idx, temps_used->l);
+-    if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
-+    set_bit(idx, ctx->temps_used.l);
++    if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) {
+         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
-     ti = ts->state_ptr;
+         return;
      if (ti == NULL) {
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
      }
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
+         tcg_gen_shli_i32(ret, arg, ofs);
+     } else if (ofs == 0) {
+         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+-    } else if (TCG_TARGET_HAS_deposit_i32
+-               && TCG_TARGET_deposit_i32_valid(ofs, len)) {
++    } else if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) {
+         TCGv_i32 zero = tcg_constant_i32(0);
+         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
+     } else {
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
+         tcg_gen_mov_i64(ret, arg2);
+         return;
+     }
+-    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+-        tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
+-        return;
+-    }
+-    if (TCG_TARGET_REG_BITS == 32) {
++    if (TCG_TARGET_REG_BITS == 64) {
++        if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) {
++            tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
++            return;
++        }
++    } else {
+         if (ofs >= 32) {
+             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
+                                 TCGV_LOW(arg2), ofs - 32, len);
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
+         tcg_gen_shli_i64(ret, arg, ofs);
+     } else if (ofs == 0) {
+         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+-    } else if (TCG_TARGET_HAS_deposit_i64
+-               && TCG_TARGET_deposit_i64_valid(ofs, len)) {
++    } else if (TCG_TARGET_REG_BITS == 64 &&
++               TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) {
+         TCGv_i64 zero = tcg_constant_i64(0);
+         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
+     } else {
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
+     tcg_gen_extu_i32_i64(dest, low);
+     /* If deposit is available, use it.  Otherwise use the extra
+        knowledge that we have of the zero-extensions above.  */
+-    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
++    if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, 32, 32)) {
+         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
+     } else {
+         tcg_gen_shli_i64(tmp, tmp, 32);
+diff --git a/tcg/tcg.c b/tcg/tcg.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg.c
++++ b/tcg/tcg.c
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
+     case INDEX_op_sar_i32:
+     case INDEX_op_extract_i32:
+     case INDEX_op_sextract_i32:
++    case INDEX_op_deposit_i32:
+         return true;
+     case INDEX_op_negsetcond_i32:
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
+     case INDEX_op_rotl_i32:
+     case INDEX_op_rotr_i32:
+         return TCG_TARGET_HAS_rot_i32;
+-    case INDEX_op_deposit_i32:
+-        return TCG_TARGET_HAS_deposit_i32;
+     case INDEX_op_extract2_i32:
+         return TCG_TARGET_HAS_extract2_i32;
+     case INDEX_op_add2_i32:
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
+     case INDEX_op_extu_i32_i64:
+     case INDEX_op_extract_i64:
+     case INDEX_op_sextract_i64:
++    case INDEX_op_deposit_i64:
+         return TCG_TARGET_REG_BITS == 64;
+     case INDEX_op_negsetcond_i64:
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
+     case INDEX_op_rotl_i64:
+     case INDEX_op_rotr_i64:
+         return TCG_TARGET_HAS_rot_i64;
+-    case INDEX_op_deposit_i64:
+-        return TCG_TARGET_HAS_deposit_i64;
+     case INDEX_op_extract2_i64:
+         return TCG_TARGET_HAS_extract2_i64;
+     case INDEX_op_extrl_i64_i32:
+@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
+ bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
+ {
++    unsigned width;
++
++    tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
++    width = (type == TCG_TYPE_I32 ? 32 : 64);
++
++    tcg_debug_assert(ofs < width);
+     tcg_debug_assert(len > 0);
+-    switch (type) {
+-    case TCG_TYPE_I32:
+-        tcg_debug_assert(ofs < 32);
+-        tcg_debug_assert(len <= 32);
+-        tcg_debug_assert(ofs + len <= 32);
+-        return TCG_TARGET_HAS_deposit_i32 &&
+-               TCG_TARGET_deposit_i32_valid(ofs, len);
+-    case TCG_TYPE_I64:
+-        tcg_debug_assert(ofs < 64);
+-        tcg_debug_assert(len <= 64);
+-        tcg_debug_assert(ofs + len <= 64);
+-        return TCG_TARGET_HAS_deposit_i64 &&
+-               TCG_TARGET_deposit_i64_valid(ofs, len);
+-    default:
+-        g_assert_not_reached();
+-    }
++    tcg_debug_assert(len <= width - ofs);
++
++    return TCG_TARGET_deposit_valid(type, ofs, len);
  }
--static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
+ static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
-+static void init_arg_info(OptContext *ctx, TCGArg arg)
+diff --git a/tcg/tci.c b/tcg/tci.c
- {
+index XXXXXXX..XXXXXXX 100644
--    init_ts_info(temps_used, arg_temp(arg));
+--- a/tcg/tci.c
-+    init_ts_info(ctx, arg_temp(arg));
++++ b/tcg/tci.c
- }
+@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
+             regs[r0] = ror32(regs[r1], regs[r2] & 31);
  static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
      }
  }
 -static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
 +static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
                               TCGOp *op, TCGArg dst, uint64_t val)
  {
      const TCGOpDef *def = &tcg_op_defs[op->opc];
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
      /* Convert movi to mov with constant temp. */
      tv = tcg_constant_internal(type, val);
 -    init_ts_info(temps_used, tv);
 +    init_ts_info(ctx, tv);
      tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
  }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
  {
      int nb_temps, nb_globals, i;
      TCGOp *op, *op_next, *prev_mb = NULL;
 -    TCGTempSet temps_used;
 +    OptContext ctx = {};
      /* Array VALS has an element for each temp.
         If this temp holds a constant then its value is kept in VALS' element.
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
      nb_temps = s->nb_temps;
      nb_globals = s->nb_globals;
 -    memset(&temps_used, 0, sizeof(temps_used));
      for (i = 0; i < nb_temps; ++i) {
          s->temps[i].state_ptr = NULL;
      }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              for (i = 0; i < nb_oargs + nb_iargs; i++) {
                  TCGTemp *ts = arg_temp(op->args[i]);
                  if (ts) {
 -                    init_ts_info(&temps_used, ts);
 +                    init_ts_info(&ctx, ts);
                  }
              }
          } else {
              nb_oargs = def->nb_oargs;
              nb_iargs = def->nb_iargs;
              for (i = 0; i < nb_oargs + nb_iargs; i++) {
 -                init_arg_info(&temps_used, op->args[i]);
 +                init_arg_info(&ctx, op->args[i]);
              }
          }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64(rotr):
              if (arg_is_const(op->args[1])
                  && arg_info(op->args[1])->val == 0) {
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
                  continue;
              }
              break;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+ #endif
+-#if TCG_TARGET_HAS_deposit_i32
-         if (partmask == 0) {
+         case INDEX_op_deposit_i32:
-             tcg_debug_assert(nb_oargs == 1);
+             tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
--            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
+             regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
 +            tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
              continue;
          }
          if (affected == 0) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64(mulsh):
              if (arg_is_const(op->args[2])
                  && arg_info(op->args[2])->val == 0) {
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
                  continue;
              }
              break;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+-#endif
-         CASE_OP_32_64_VEC(sub):
+         case INDEX_op_extract_i32:
-         CASE_OP_32_64_VEC(xor):
+             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
-             if (args_are_copies(op->args[1], op->args[2])) {
+             regs[r0] = extract32(regs[r1], pos, len);
--                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
+@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
-+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
+             regs[r0] = ror64(regs[r1], regs[r2] & 63);
                  continue;
              }
              break;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+ #endif
-             if (arg_is_const(op->args[1])) {
+-#if TCG_TARGET_HAS_deposit_i64
-                 tmp = arg_info(op->args[1])->val;
+         case INDEX_op_deposit_i64:
-                 tmp = dup_const(TCGOP_VECE(op), tmp);
+             tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
--                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+             regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
-+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+             break;
-                 break;
+-#endif
-             }
+         case INDEX_op_extract_i64:
-             goto do_default;
+             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+             regs[r0] = extract64(regs[r1], pos, len);
-         case INDEX_op_dup2_vec:
+diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
-             assert(TCG_TARGET_REG_BITS == 32);
+index XXXXXXX..XXXXXXX 100644
-             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+--- a/tcg/tci/tcg-target.c.inc
--                tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
++++ b/tcg/tci/tcg-target.c.inc
-+                tcg_opt_gen_movi(s, &ctx, op, op->args[0],
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
-                                  deposit64(arg_info(op->args[1])->val, 32, 32,
+         tcg_out_op_rrr(s, opc, args[0], args[1], args[2]);
-                                            arg_info(op->args[2])->val));
+         break;
-                 break;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+-    CASE_32_64(deposit)  /* Optional (TCG_TARGET_HAS_deposit_*). */
-         case INDEX_op_extrh_i64_i32:
++    CASE_32_64(deposit)
-             if (arg_is_const(op->args[1])) {
+         tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]);
-                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
+         break;
--                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (arg_is_const(op->args[1])) {
                  tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                            op->args[2]);
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
                  tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                            arg_info(op->args[2])->val);
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  TCGArg v = arg_info(op->args[1])->val;
                  if (v != 0) {
                      tmp = do_constant_folding(opc, v, 0);
 -                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                    tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  } else {
                      tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
                  }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  tmp = deposit64(arg_info(op->args[1])->val,
                                  op->args[3], op->args[4],
                                  arg_info(op->args[2])->val);
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (arg_is_const(op->args[1])) {
                  tmp = extract64(arg_info(op->args[1])->val,
                                  op->args[2], op->args[3]);
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (arg_is_const(op->args[1])) {
                  tmp = sextract64(arg_info(op->args[1])->val,
                                   op->args[2], op->args[3]);
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                      tmp = (int32_t)(((uint32_t)v1 >> shr) |
                                      ((uint32_t)v2 << (32 - shr)));
                  }
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              tmp = do_constant_folding_cond(opc, op->args[1],
                                             op->args[2], op->args[3]);
              if (tmp != 2) {
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                             op->args[1], op->args[2]);
              if (tmp != 2) {
                  if (tmp) {
 -                    memset(&temps_used, 0, sizeof(temps_used));
 +                    memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                      op->opc = INDEX_op_br;
                      op->args[0] = op->args[3];
                  } else {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  rl = op->args[0];
                  rh = op->args[1];
 -                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
 -                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
 +                tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
 +                tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  rl = op->args[0];
                  rh = op->args[1];
 -                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
 -                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
 +                tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
 +                tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
                  break;
              }
              goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (tmp != 2) {
                  if (tmp) {
              do_brcond_true:
 -                    memset(&temps_used, 0, sizeof(temps_used));
 +                    memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                      op->opc = INDEX_op_br;
                      op->args[0] = op->args[5];
                  } else {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  /* Simplify LT/GE comparisons vs zero to a single compare
                     vs the high word of the input.  */
              do_brcond_high:
 -                memset(&temps_used, 0, sizeof(temps_used));
 +                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                  op->opc = INDEX_op_brcond_i32;
                  op->args[0] = op->args[1];
                  op->args[1] = op->args[3];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                      goto do_default;
                  }
              do_brcond_low:
 -                memset(&temps_used, 0, sizeof(temps_used));
 +                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                  op->opc = INDEX_op_brcond_i32;
                  op->args[1] = op->args[2];
                  op->args[2] = op->args[4];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                              op->args[5]);
              if (tmp != 2) {
              do_setcond_const:
 -                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
 +                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
              } else if ((op->args[5] == TCG_COND_LT
                          || op->args[5] == TCG_COND_GE)
                         && arg_is_const(op->args[3])
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              if (!(tcg_call_flags(op)
                    & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
                  for (i = 0; i < nb_globals; i++) {
 -                    if (test_bit(i, temps_used.l)) {
 +                    if (test_bit(i, ctx.temps_used.l)) {
                          reset_ts(&s->temps[i]);
                      }
                  }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 block, otherwise we only trash the output args.  "z_mask" is
                 the non-zero bits mask for the first output arg.  */
              if (def->flags & TCG_OPF_BB_END) {
 -                memset(&temps_used, 0, sizeof(temps_used));
 +                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
              } else {
          do_reset_output:
                  for (i = 0; i < nb_oargs; i++) {
 --
-.25.1
+.43.0

-[PULL 14/56] tcg/optimize: Drop nb_oargs, nb_iargs locals
+[PULL 63/68] util/cpuinfo-riscv: Detect Zbs
-Rather than try to keep these up-to-date across folding,
+Acked-by: Alistair Francis <alistair.francis@wdc.com>
-re-read nb_oargs at the end, after re-reading the opcode.
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 Message-ID: <20250102181601.1421059-2-richard.henderson@linaro.org>
 ---
  host/include/riscv/host/cpuinfo.h |  5 +++--
  util/cpuinfo-riscv.c              | 18 ++++++++++++++++--
 files changed, 19 insertions(+), 4 deletions(-)
-A couple of asserts need dropping, but that will take care
+diff --git a/host/include/riscv/host/cpuinfo.h b/host/include/riscv/host/cpuinfo.h
 of itself as we split the function further.
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
  tcg/optimize.c | 14 ++++----------
 file changed, 4 insertions(+), 10 deletions(-)
 diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/host/include/riscv/host/cpuinfo.h
-+++ b/tcg/optimize.c
++++ b/host/include/riscv/host/cpuinfo.h
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@
+ #define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
-     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
+ #define CPUINFO_ZBA             (1u << 1)
-         uint64_t z_mask, partmask, affected, tmp;
+ #define CPUINFO_ZBB             (1u << 2)
--        int nb_oargs, nb_iargs;
+-#define CPUINFO_ZICOND          (1u << 3)
-         TCGOpcode opc = op->opc;
+-#define CPUINFO_ZVE64X          (1u << 4)
-         const TCGOpDef *def;
++#define CPUINFO_ZBS             (1u << 3)
++#define CPUINFO_ZICOND          (1u << 4)
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++#define CPUINFO_ZVE64X          (1u << 5)
  /* Initialized with a constructor. */
  extern unsigned cpuinfo;
 diff --git a/util/cpuinfo-riscv.c b/util/cpuinfo-riscv.c
 index XXXXXXX..XXXXXXX 100644
 --- a/util/cpuinfo-riscv.c
 +++ b/util/cpuinfo-riscv.c
@@ -XXX,XX +XXX,XX @@ static void sigill_handler(int signo, siginfo_t *si, void *data)
  /* Called both as constructor and (possibly) via other constructors. */
  unsigned __attribute__((constructor)) cpuinfo_init(void)
  {
 -    unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZICOND | CPUINFO_ZVE64X;
 +    unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS
 +                  | CPUINFO_ZICOND | CPUINFO_ZVE64X;
      unsigned info = cpuinfo;
      if (info) {
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
  #if defined(__riscv_arch_test) && defined(__riscv_zbb)
      info |= CPUINFO_ZBB;
  #endif
 +#if defined(__riscv_arch_test) && defined(__riscv_zbs)
 +    info |= CPUINFO_ZBS;
 +#endif
  #if defined(__riscv_arch_test) && defined(__riscv_zicond)
      info |= CPUINFO_ZICOND;
  #endif
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
              && pair.key >= 0) {
              info |= pair.value & RISCV_HWPROBE_EXT_ZBA ? CPUINFO_ZBA : 0;
              info |= pair.value & RISCV_HWPROBE_EXT_ZBB ? CPUINFO_ZBB : 0;
 -            left &= ~(CPUINFO_ZBA | CPUINFO_ZBB);
 +            info |= pair.value & RISCV_HWPROBE_EXT_ZBS ? CPUINFO_ZBS : 0;
 +            left &= ~(CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS);
  #ifdef RISCV_HWPROBE_EXT_ZICOND
              info |= pair.value & RISCV_HWPROBE_EXT_ZICOND ? CPUINFO_ZICOND : 0;
              left &= ~CPUINFO_ZICOND;
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
              left &= ~CPUINFO_ZBB;
          }
-         def = &tcg_op_defs[opc];
++        if (left & CPUINFO_ZBS) {
--        nb_oargs = def->nb_oargs;
++            /* Probe for Zbs: bext zero,zero,zero. */
--        nb_iargs = def->nb_iargs;
++            got_sigill = 0;
--        init_arguments(&ctx, op, nb_oargs + nb_iargs);
++            asm volatile(".insn r 0x33, 5, 0x24, zero, zero, zero"
--        copy_propagate(&ctx, op, nb_oargs, nb_iargs);
++                         : : : "memory");
-+        init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
++            info |= got_sigill ? 0 : CPUINFO_ZBS;
-+        copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
++            left &= ~CPUINFO_ZBS;
++        }
-         /* For commutative operations make constant second argument */
++
-         switch (opc) {
+         if (left & CPUINFO_ZICOND) {
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+             /* Probe for Zicond: czero.eqz zero,zero,zero. */
+             got_sigill = 0;
          CASE_OP_32_64(qemu_ld):
              {
 -                MemOpIdx oi = op->args[nb_oargs + nb_iargs];
 +                MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
                  MemOp mop = get_memop(oi);
                  if (!(mop & MO_SIGN)) {
                      z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          }
          if (partmask == 0) {
 -            tcg_debug_assert(nb_oargs == 1);
              tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
              continue;
          }
          if (affected == 0) {
 -            tcg_debug_assert(nb_oargs == 1);
              tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
              continue;
          }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              } else if (args_are_copies(op->args[1], op->args[2])) {
                  op->opc = INDEX_op_dup_vec;
                  TCGOP_VECE(op) = MO_32;
 -                nb_iargs = 1;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  op->opc = opc = (opc == INDEX_op_movcond_i32
                                   ? INDEX_op_setcond_i32
                                   : INDEX_op_setcond_i64);
 -                nb_iargs = 2;
              }
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          if (def->flags & TCG_OPF_BB_END) {
              memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
          } else {
 +            int nb_oargs = def->nb_oargs;
              for (i = 0; i < nb_oargs; i++) {
                  reset_temp(op->args[i]);
                  /* Save the corresponding known-zero bits mask for the
 --
-.25.1
+.43.0

-[PULL 35/56] tcg/optimize: Split out fold_xx_to_i
+[PULL 64/68] tcg/riscv: Use BEXTI for single-bit extractions
-Pull the "op r, a, a => movi r, 0" optimization into a function,
+Acked-by: Alistair Francis <alistair.francis@wdc.com>
-and use it in the outer opcode fold functions.
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 Message-ID: <20250102181601.1421059-3-richard.henderson@linaro.org>
 ---
  tcg/riscv/tcg-target-has.h |  8 +++++++-
  tcg/riscv/tcg-target.c.inc | 11 +++++++++--
 files changed, 16 insertions(+), 3 deletions(-)
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
+diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
  tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
 file changed, 24 insertions(+), 17 deletions(-)
 diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/riscv/tcg-target-has.h
-+++ b/tcg/optimize.c
++++ b/tcg/riscv/tcg-target-has.h
-@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
-     return false;
+         /* ofs > 0 uses SRLIW; ofs == 0 uses add.uw. */
- }
+         return ofs || (cpuinfo & CPUINFO_ZBA);
+     }
-+/* If the binary operation has both arguments equal, fold to @i. */
+-    return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && len == 16;
-+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
++    switch (len) {
-+{
++    case 1:
-+    if (args_are_copies(op->args[1], op->args[2])) {
++        return (cpuinfo & CPUINFO_ZBS) && ofs != 0;
-+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
++    case 16:
-+    }
++        return (cpuinfo & CPUINFO_ZBB) && ofs == 0;
 +    return false;
 +}
 +
  /*
   * These outermost fold_<op> functions are sorted alphabetically.
   */
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
  static bool fold_andc(OptContext *ctx, TCGOp *op)
  {
 -    return fold_const2(ctx, op);
 +    if (fold_const2(ctx, op) ||
 +        fold_xx_to_i(ctx, op, 0)) {
 +        return true;
 +    }
 +    return false;
  }
+ #define TCG_TARGET_extract_valid  tcg_target_extract_valid
- static bool fold_brcond(OptContext *ctx, TCGOp *op)
-@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
+diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
- static bool fold_sub(OptContext *ctx, TCGOp *op)
+--- a/tcg/riscv/tcg-target.c.inc
- {
++++ b/tcg/riscv/tcg-target.c.inc
--    return fold_const2(ctx, op);
+@@ -XXX,XX +XXX,XX @@ typedef enum {
-+    if (fold_const2(ctx, op) ||
+     OPC_ANDI = 0x7013,
-+        fold_xx_to_i(ctx, op, 0)) {
+     OPC_AUIPC = 0x17,
-+        return true;
+     OPC_BEQ = 0x63,
-+    }
++    OPC_BEXTI = 0x48005013,
-+    return false;
+     OPC_BGE = 0x5063,
- }
+     OPC_BGEU = 0x7063,
+     OPC_BLT = 0x4063,
- static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
  static bool fold_xor(OptContext *ctx, TCGOp *op)
  {
 -    return fold_const2(ctx, op);
 +    if (fold_const2(ctx, op) ||
 +        fold_xx_to_i(ctx, op, 0)) {
 +        return true;
 +    }
 +    return false;
  }
  /* Propagate constants and copies, fold constant expressions. */
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              break;
          }
+         /* FALLTHRU */
--        /* Simplify expression for "op r, a, a => movi r, 0" cases */
+     case INDEX_op_extract_i32:
--        switch (opc) {
+-        if (a2 == 0 && args[3] == 16) {
--        CASE_OP_32_64_VEC(andc):
++        switch (args[3]) {
--        CASE_OP_32_64_VEC(sub):
++        case 1:
--        CASE_OP_32_64_VEC(xor):
++            tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, a2);
--            if (args_are_copies(op->args[1], op->args[2])) {
++            break;
--                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
++        case 16:
--                continue;
++            tcg_debug_assert(a2 == 0);
--            }
+             tcg_out_ext16u(s, a0, a1);
--            break;
+-        } else {
--        default:
++            break;
--            break;
++        default:
--        }
+             g_assert_not_reached();
--
+         }
-         /*
+         break;
           * Process each opcode.
           * Sorted alphabetically by opcode as much as possible.
 --
-.25.1
+.43.0

-[PULL 13/56] tcg/optimize: Split out fold_call
+[PULL 65/68] linux-user: Add missing /proc/cpuinfo fields for sparc
-Calls are special in that they have a variable number
+From: Helge Deller <deller@kernel.org>
 of arguments, and need to be able to clobber globals.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Add some missing fields which may be parsed by userspace applications.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Helge Deller <deller@gmx.de>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-ID: <Z39B1wzNNpndmOxZ@p100>
 ---
- tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
+ linux-user/sparc/target_proc.h | 20 +++++++++++++++++++-
-file changed, 41 insertions(+), 22 deletions(-)
+file changed, 19 insertions(+), 1 deletion(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/linux-user/sparc/target_proc.h b/linux-user/sparc/target_proc.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/linux-user/sparc/target_proc.h
-+++ b/tcg/optimize.c
++++ b/linux-user/sparc/target_proc.h
-@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
+@@ -XXX,XX +XXX,XX @@
-     }
- }
+ static int open_cpuinfo(CPUArchState *cpu_env, int fd)
+ {
-+static bool fold_call(OptContext *ctx, TCGOp *op)
+-    dprintf(fd, "type\t\t: sun4u\n");
-+{
++    int i, num_cpus;
-+    TCGContext *s = ctx->tcg;
++    const char *cpu_type;
 +    int nb_oargs = TCGOP_CALLO(op);
 +    int nb_iargs = TCGOP_CALLI(op);
 +    int flags, i;
 +
-+    init_arguments(ctx, op, nb_oargs + nb_iargs);
++    num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-+    copy_propagate(ctx, op, nb_oargs, nb_iargs);
++    if (cpu_env->def.features & CPU_FEATURE_HYPV) {
-+
++        cpu_type = "sun4v";
-+    /* If the function reads or writes globals, reset temp data. */
++    } else {
-+    flags = tcg_call_flags(op);
++        cpu_type = "sun4u";
 +    if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
 +        int nb_globals = s->nb_globals;
 +
 +        for (i = 0; i < nb_globals; i++) {
 +            if (test_bit(i, ctx->temps_used.l)) {
 +                reset_ts(&ctx->tcg->temps[i]);
 +            }
 +        }
 +    }
 +
-+    /* Reset temp data for outputs. */
++    dprintf(fd, "cpu\t\t: %s (QEMU)\n", cpu_env->def.name);
-+    for (i = 0; i < nb_oargs; i++) {
++    dprintf(fd, "type\t\t: %s\n", cpu_type);
-+        reset_temp(op->args[i]);
++    dprintf(fd, "ncpus probed\t: %d\n", num_cpus);
 +    dprintf(fd, "ncpus active\t: %d\n", num_cpus);
 +    dprintf(fd, "State:\n");
 +    for (i = 0; i < num_cpus; i++) {
 +        dprintf(fd, "CPU%d:\t\t: online\n", i);
 +    }
 +
-+    /* Stop optimizing MB across calls. */
+     return 0;
-+    ctx->prev_mb = NULL;
+ }
-+    return true;
+ #define HAVE_ARCH_PROC_CPUINFO
 +}
 +
  /* Propagate constants and copies, fold constant expressions. */
  void tcg_optimize(TCGContext *s)
  {
 -    int nb_temps, nb_globals, i;
 +    int nb_temps, i;
      TCGOp *op, *op_next;
      OptContext ctx = { .tcg = s };
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         available through the doubly linked circular list. */
      nb_temps = s->nb_temps;
 -    nb_globals = s->nb_globals;
 -
      for (i = 0; i < nb_temps; ++i) {
          s->temps[i].state_ptr = NULL;
      }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          uint64_t z_mask, partmask, affected, tmp;
          int nb_oargs, nb_iargs;
          TCGOpcode opc = op->opc;
 -        const TCGOpDef *def = &tcg_op_defs[opc];
 +        const TCGOpDef *def;
 -        /* Count the arguments, and initialize the temps that are
 -           going to be used */
 +        /* Calls are special. */
          if (opc == INDEX_op_call) {
 -            nb_oargs = TCGOP_CALLO(op);
 -            nb_iargs = TCGOP_CALLI(op);
 -        } else {
 -            nb_oargs = def->nb_oargs;
 -            nb_iargs = def->nb_iargs;
 +            fold_call(&ctx, op);
 +            continue;
          }
 +
 +        def = &tcg_op_defs[opc];
 +        nb_oargs = def->nb_oargs;
 +        nb_iargs = def->nb_iargs;
          init_arguments(&ctx, op, nb_oargs + nb_iargs);
          copy_propagate(&ctx, op, nb_oargs, nb_iargs);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          if (def->flags & TCG_OPF_BB_END) {
              memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
          } else {
 -            if (opc == INDEX_op_call &&
 -                !(tcg_call_flags(op)
 -                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
 -                for (i = 0; i < nb_globals; i++) {
 -                    if (test_bit(i, ctx.temps_used.l)) {
 -                        reset_ts(&s->temps[i]);
 -                    }
 -                }
 -            }
 -
              for (i = 0; i < nb_oargs; i++) {
                  reset_temp(op->args[i]);
                  /* Save the corresponding known-zero bits mask for the
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
              case INDEX_op_qemu_st_i32:
              case INDEX_op_qemu_st8_i32:
              case INDEX_op_qemu_st_i64:
 -            case INDEX_op_call:
                  /* Opcodes that touch guest memory stop the optimization.  */
                  ctx.prev_mb = NULL;
                  break;
 --
-.25.1
+.43.0

-[PULL 01/56] qemu/int128: Add int128_{not,xor}
+[PULL 66/68] tcg: Document tb_lookup() and tcg_tb_lookup()
-From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
+From: Ilya Leoshkevich <iii@linux.ibm.com>
-Addition of not and xor on 128-bit integers.
+These similarly named functions serve different purposes; add
 docstrings to highlight them.
-Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
+Suggested-by: Alex Bennée <alex.bennee@linaro.org>
-Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 [rth: Split out logical operations.]
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-ID: <20250116213214.5695-1-iii@linux.ibm.com>
 ---
- include/qemu/int128.h | 20 ++++++++++++++++++++
+ include/tcg/tcg.h    | 41 +++++++++++++++++++++++++++++++++++++++++
-file changed, 20 insertions(+)
+ accel/tcg/cpu-exec.c | 15 ++++++++++++++-
 files changed, 55 insertions(+), 1 deletion(-)
-diff --git a/include/qemu/int128.h b/include/qemu/int128.h
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/qemu/int128.h
+--- a/include/tcg/tcg.h
-+++ b/include/qemu/int128.h
++++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
+@@ -XXX,XX +XXX,XX @@ void tcg_region_reset_all(void);
-     return a;
+ size_t tcg_code_size(void);
  size_t tcg_code_capacity(void);
 +/**
 + * tcg_tb_insert:
 + * @tb: translation block to insert
 + *
 + * Insert @tb into the region trees.
 + */
  void tcg_tb_insert(TranslationBlock *tb);
 +
 +/**
 + * tcg_tb_remove:
 + * @tb: translation block to remove
 + *
 + * Remove @tb from the region trees.
 + */
  void tcg_tb_remove(TranslationBlock *tb);
 +
 +/**
 + * tcg_tb_lookup:
 + * @tc_ptr: host PC to look up
 + *
 + * Look up a translation block inside the region trees by @tc_ptr. This is
 + * useful for exception handling, but must not be used for the purposes of
 + * executing the returned translation block. See struct tb_tc for more
 + * information.
 + *
 + * Returns: a translation block previously inserted into the region trees,
 + * such that @tc_ptr points anywhere inside the code generated for it, or
 + * NULL.
 + */
  TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr);
 +
 +/**
 + * tcg_tb_foreach:
 + * @func: callback
 + * @user_data: opaque value to pass to @callback
 + *
 + * Call @func for each translation block inserted into the region trees.
 + */
  void tcg_tb_foreach(GTraverseFunc func, gpointer user_data);
 +
 +/**
 + * tcg_nb_tbs:
 + *
 + * Returns: the number of translation blocks inserted into the region trees.
 + */
  size_t tcg_nb_tbs(void);
  /* user-mode: Called with mmap_lock held.  */
 diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cpu-exec.c
 +++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
      return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
  }
-+static inline Int128 int128_not(Int128 a)
+-/* Might cause an exception, so have a longjmp destination ready */
-+{
++/**
-+    return ~a;
++ * tb_lookup:
-+}
++ * @cpu: CPU that will execute the returned translation block
-+
++ * @pc: guest PC
- static inline Int128 int128_and(Int128 a, Int128 b)
++ * @cs_base: arch-specific value associated with translation block
- {
++ * @flags: arch-specific translation block flags
-     return a & b;
++ * @cflags: CF_* flags
-@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
++ *
-     return a | b;
++ * Look up a translation block inside the QHT using @pc, @cs_base, @flags and
- }
++ * @cflags. Uses @cpu's tb_jmp_cache. Might cause an exception, so have a
++ * longjmp destination ready.
-+static inline Int128 int128_xor(Int128 a, Int128 b)
++ *
-+{
++ * Returns: an existing translation block or NULL.
-+    return a ^ b;
++ */
-+}
+ static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
-+
+                                           uint64_t cs_base, uint32_t flags,
- static inline Int128 int128_rshift(Int128 a, int n)
+                                           uint32_t cflags)
  {
      return a >> n;
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
      return int128_make128(a, (a < 0) ? -1 : 0);
  }
 +static inline Int128 int128_not(Int128 a)
 +{
 +    return int128_make128(~a.lo, ~a.hi);
 +}
 +
  static inline Int128 int128_and(Int128 a, Int128 b)
  {
      return int128_make128(a.lo & b.lo, a.hi & b.hi);
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
      return int128_make128(a.lo | b.lo, a.hi | b.hi);
  }
 +static inline Int128 int128_xor(Int128 a, Int128 b)
 +{
 +    return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
 +}
 +
  static inline Int128 int128_rshift(Int128 a, int n)
  {
      int64_t h;
 --
-.25.1
+.43.0

-[PULL 56/56] tcg/optimize: Propagate sign info for shifting
+[PULL 67/68] accel/tcg: Call tcg_tb_insert() for one-insn TBs
-For constant shifts, we can simply shift the s_mask.
+From: Ilya Leoshkevich <iii@linux.ibm.com>
-For variable shifts, we know that sar does not reduce
+Currently one-insn TBs created from I/O memory are not added to
-the s_mask, which helps for sequences like
+region_trees. Therefore, when they generate exceptions, they are not
 handled by cpu_restore_state_from_tb().
-    ext32s_i64  t, in
+For x86 this is not a problem, because x86_restore_state_to_opc() only
-    sar_i64     t, t, v
+restores pc and cc, which already have the correct values if the first
-    ext32s_i64  out, t
+TB instruction causes an exception. However, on several other
 architectures, restore_state_to_opc() is not stricly limited to state
 restoration and affects some exception-related registers, where guests
 can notice incorrect values, for example:
-allowing the final extend to be eliminated.
+- arm's exception.syndrome;
 - hppa's unwind_breg;
 - riscv's excp_uw2;
 - s390x's int_pgm_ilen.
+Fix by always calling tcg_tb_insert(). This may increase the size of
+region_trees, but tcg_region_reset_all() clears it once code_gen_buffer
+fills up, so it will not grow uncontrollably.
+Do not call tb_link_page(), which would add such TBs to the QHT, to
+prevent tb_lookup() from finding them. These TBs are single-use, since
+subsequent reads from I/O memory may return different values; they are
+not removed from code_gen_buffer only in order to keep things simple.
+Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-ID: <20250116213214.5695-2-iii@linux.ibm.com>
 ---
- tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
+ accel/tcg/translate-all.c | 29 +++++++++++++++++++----------
-file changed, 47 insertions(+), 3 deletions(-)
+file changed, 19 insertions(+), 10 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/accel/tcg/translate-all.c
-+++ b/tcg/optimize.c
++++ b/accel/tcg/translate-all.c
-@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
+@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
-     return ~(~0ull >> rep);
+         tb_reset_jump(tb, 1);
  }
 +/*
 + * Recreate a properly left-aligned smask after manipulation.
 + * Some bit-shuffling, particularly shifts and rotates, may
 + * retain sign bits on the left, but may scatter disconnected
 + * sign bits on the right.  Retain only what remains to the left.
 + */
 +static uint64_t smask_from_smask(int64_t smask)
 +{
 +    /* Only the 1 bits are significant for smask */
 +    return smask_from_zmask(~smask);
 +}
 +
  static inline TempOptInfo *ts_info(TCGTemp *ts)
  {
      return ts->state_ptr;
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
  static bool fold_shift(OptContext *ctx, TCGOp *op)
  {
 +    uint64_t s_mask, z_mask, sign;
 +
      if (fold_const2(ctx, op) ||
          fold_ix_to_i(ctx, op, 0) ||
          fold_xi_to_x(ctx, op, 0)) {
          return true;
      }
-+    s_mask = arg_info(op->args[1])->s_mask;
+-    /*
-+    z_mask = arg_info(op->args[1])->z_mask;
+-     * If the TB is not associated with a physical RAM page then it must be
-+
+-     * a temporary one-insn TB, and we have nothing left to do. Return early
-     if (arg_is_const(op->args[2])) {
+-     * before attempting to link to other TBs or add to the lookup table.
--        ctx->z_mask = do_constant_folding(op->opc, ctx->type,
+-     */
--                                          arg_info(op->args[1])->z_mask,
+-    if (tb_page_addr0(tb) == -1) {
--                                          arg_info(op->args[2])->val);
+-        assert_no_pages_locked();
-+        int sh = arg_info(op->args[2])->val;
+-        return tb;
-+
+-    }
-+        ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+-
-+
+     /*
-+        s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
+      * Insert TB into the corresponding region tree before publishing it
-+        ctx->s_mask = smask_from_smask(s_mask);
+      * through QHT. Otherwise rewinding happened in the TB might fail to
-+
+@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
-         return fold_masks(ctx, op);
+      */
-     }
+     tcg_tb_insert(tb);
-+
-+    switch (op->opc) {
++    /*
-+    CASE_OP_32_64(sar):
++     * If the TB is not associated with a physical RAM page then it must be
-+        /*
++     * a temporary one-insn TB.
-+         * Arithmetic right shift will not reduce the number of
++     *
-+         * input sign repetitions.
++     * Such TBs must be added to region trees in order to make sure that
-+         */
++     * restore_state_to_opc() - which on some architectures is not limited to
-+        ctx->s_mask = s_mask;
++     * rewinding, but also affects exception handling! - is called when such a
-+        break;
++     * TB causes an exception.
-+    CASE_OP_32_64(shr):
++     *
-+        /*
++     * At the same time, temporary one-insn TBs must be executed at most once,
-+         * If the sign bit is known zero, then logical right shift
++     * because subsequent reads from, e.g., I/O memory may return different
-+         * will not reduced the number of input sign repetitions.
++     * values. So return early before attempting to link to other TBs or add
-+         */
++     * to the QHT.
-+        sign = (s_mask & -s_mask) >> 1;
++     */
-+        if (!(z_mask & sign)) {
++    if (tb_page_addr0(tb) == -1) {
-+            ctx->s_mask = s_mask;
++        assert_no_pages_locked();
-+        }
++        return tb;
 +        break;
 +    default:
 +        break;
 +    }
 +
-     return false;
+     /*
- }
+      * No explicit memory barrier is required -- tb_link_page() makes the
+      * TB visible in a consistent state.
 --
-.25.1
+.43.0

-[PULL 43/56] tcg/optimize: Split out fold_masks
+[PULL 68/68] softfloat: Constify helpers returning float_status field
-Move all of the known-zero optimizations into the per-opcode
+From: Philippe Mathieu-Daudé <philmd@linaro.org>
 functions.  Use fold_masks when there is a possibility of the
 result being determined, and simply set ctx->z_mask otherwise.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+These helpers don't alter float_status. Make it const.
-Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
 Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-ID: <20250116214359.67295-1-philmd@linaro.org>
 ---
- tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
+ include/fpu/softfloat-helpers.h | 25 ++++++++++++++-----------
-file changed, 294 insertions(+), 251 deletions(-)
+file changed, 14 insertions(+), 11 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/fpu/softfloat-helpers.h
-+++ b/tcg/optimize.c
++++ b/include/fpu/softfloat-helpers.h
-@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
+@@ -XXX,XX +XXX,XX @@ static inline void set_no_signaling_nans(bool val, float_status *status)
-     TCGTempSet temps_used;
+     status->no_signaling_nans = val;
      /* In flight values from optimization. */
 -    uint64_t z_mask;
 +    uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
 +    uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
      TCGType type;
  } OptContext;
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
      return false;
  }
-+static bool fold_masks(OptContext *ctx, TCGOp *op)
+-static inline bool get_float_detect_tininess(float_status *status)
-+{
++static inline bool get_float_detect_tininess(const float_status *status)
 +    uint64_t a_mask = ctx->a_mask;
 +    uint64_t z_mask = ctx->z_mask;
 +
 +    /*
 +     * 32-bit ops generate 32-bit results.  For the result is zero test
 +     * below, we can ignore high bits, but for further optimizations we
 +     * need to record that the high bits contain garbage.
 +     */
 +    if (ctx->type == TCG_TYPE_I32) {
 +        ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
 +        a_mask &= MAKE_64BIT_MASK(0, 32);
 +        z_mask &= MAKE_64BIT_MASK(0, 32);
 +    }
 +
 +    if (z_mask == 0) {
 +        return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
 +    }
 +    if (a_mask == 0) {
 +        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 +    }
 +    return false;
 +}
 +
  /*
   * Convert @op to NOT, if NOT is supported by the host.
   * Return true f the conversion is successful, which will still
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
  static bool fold_and(OptContext *ctx, TCGOp *op)
  {
-+    uint64_t z1, z2;
+     return status->tininess_before_rounding;
 +
      if (fold_const2(ctx, op) ||
          fold_xi_to_i(ctx, op, 0) ||
          fold_xi_to_x(ctx, op, -1) ||
          fold_xx_to_x(ctx, op)) {
          return true;
      }
 -    return false;
 +
 +    z1 = arg_info(op->args[1])->z_mask;
 +    z2 = arg_info(op->args[2])->z_mask;
 +    ctx->z_mask = z1 & z2;
 +
 +    /*
 +     * Known-zeros does not imply known-ones.  Therefore unless
 +     * arg2 is constant, we can't infer affected bits from it.
 +     */
 +    if (arg_is_const(op->args[2])) {
 +        ctx->a_mask = z1 & ~z2;
 +    }
 +
 +    return fold_masks(ctx, op);
  }
- static bool fold_andc(OptContext *ctx, TCGOp *op)
+-static inline FloatRoundMode get_float_rounding_mode(float_status *status)
 +static inline FloatRoundMode get_float_rounding_mode(const float_status *status)
  {
-+    uint64_t z1;
+     return status->float_rounding_mode;
 +
      if (fold_const2(ctx, op) ||
          fold_xx_to_i(ctx, op, 0) ||
          fold_xi_to_x(ctx, op, 0) ||
          fold_ix_to_not(ctx, op, -1)) {
          return true;
      }
 -    return false;
 +
 +    z1 = arg_info(op->args[1])->z_mask;
 +
 +    /*
 +     * Known-zeros does not imply known-ones.  Therefore unless
 +     * arg2 is constant, we can't infer anything from it.
 +     */
 +    if (arg_is_const(op->args[2])) {
 +        uint64_t z2 = ~arg_info(op->args[2])->z_mask;
 +        ctx->a_mask = z1 & ~z2;
 +        z1 &= z2;
 +    }
 +    ctx->z_mask = z1;
 +
 +    return fold_masks(ctx, op);
  }
- static bool fold_brcond(OptContext *ctx, TCGOp *op)
+-static inline int get_float_exception_flags(float_status *status)
-@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
++static inline int get_float_exception_flags(const float_status *status)
  static bool fold_bswap(OptContext *ctx, TCGOp *op)
  {
-+    uint64_t z_mask, sign;
+     return status->float_exception_flags;
 +
      if (arg_is_const(op->args[1])) {
          uint64_t t = arg_info(op->args[1])->val;
          t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
      }
 -    return false;
 +
 +    z_mask = arg_info(op->args[1])->z_mask;
 +    switch (op->opc) {
 +    case INDEX_op_bswap16_i32:
 +    case INDEX_op_bswap16_i64:
 +        z_mask = bswap16(z_mask);
 +        sign = INT16_MIN;
 +        break;
 +    case INDEX_op_bswap32_i32:
 +    case INDEX_op_bswap32_i64:
 +        z_mask = bswap32(z_mask);
 +        sign = INT32_MIN;
 +        break;
 +    case INDEX_op_bswap64_i64:
 +        z_mask = bswap64(z_mask);
 +        sign = INT64_MIN;
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
 +
 +    switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
 +    case TCG_BSWAP_OZ:
 +        break;
 +    case TCG_BSWAP_OS:
 +        /* If the sign bit may be 1, force all the bits above to 1. */
 +        if (z_mask & sign) {
 +            z_mask |= sign;
 +        }
 +        break;
 +    default:
 +        /* The high bits are undefined: force all bits above the sign to 1. */
 +        z_mask |= sign << 1;
 +        break;
 +    }
 +    ctx->z_mask = z_mask;
 +
 +    return fold_masks(ctx, op);
  }
- static bool fold_call(OptContext *ctx, TCGOp *op)
+ static inline FloatX80RoundPrec
-@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
+-get_floatx80_rounding_precision(float_status *status)
++get_floatx80_rounding_precision(const float_status *status)
  static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
  {
-+    uint64_t z_mask;
+     return status->floatx80_rounding_precision;
 +
      if (arg_is_const(op->args[1])) {
          uint64_t t = arg_info(op->args[1])->val;
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
          }
          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
      }
 +
 +    switch (ctx->type) {
 +    case TCG_TYPE_I32:
 +        z_mask = 31;
 +        break;
 +    case TCG_TYPE_I64:
 +        z_mask = 63;
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
 +    ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
 +
      return false;
  }
- static bool fold_ctpop(OptContext *ctx, TCGOp *op)
+-static inline Float2NaNPropRule get_float_2nan_prop_rule(float_status *status)
 +static inline Float2NaNPropRule
 +get_float_2nan_prop_rule(const float_status *status)
  {
--    return fold_const1(ctx, op);
+     return status->float_2nan_prop_rule;
 +    if (fold_const1(ctx, op)) {
 +        return true;
 +    }
 +
 +    switch (ctx->type) {
 +    case TCG_TYPE_I32:
 +        ctx->z_mask = 32 | 31;
 +        break;
 +    case TCG_TYPE_I64:
 +        ctx->z_mask = 64 | 63;
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
 +    return false;
  }
- static bool fold_deposit(OptContext *ctx, TCGOp *op)
+-static inline Float3NaNPropRule get_float_3nan_prop_rule(float_status *status)
-@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
++static inline Float3NaNPropRule
-         t1 = deposit64(t1, op->args[3], op->args[4], t2);
++get_float_3nan_prop_rule(const float_status *status)
-         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+ {
-     }
+     return status->float_3nan_prop_rule;
 +
 +    ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
 +                            op->args[3], op->args[4],
 +                            arg_info(op->args[2])->z_mask);
      return false;
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
+-static inline FloatInfZeroNaNRule get_float_infzeronan_rule(float_status *status)
++static inline FloatInfZeroNaNRule
- static bool fold_extract(OptContext *ctx, TCGOp *op)
++get_float_infzeronan_rule(const float_status *status)
  {
-+    uint64_t z_mask_old, z_mask;
+     return status->float_infzeronan_rule;
 +
      if (arg_is_const(op->args[1])) {
          uint64_t t;
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
          t = extract64(t, op->args[2], op->args[3]);
          return tcg_opt_gen_movi(ctx, op, op->args[0], t);
      }
 -    return false;
 +
 +    z_mask_old = arg_info(op->args[1])->z_mask;
 +    z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
 +    if (op->args[2] == 0) {
 +        ctx->a_mask = z_mask_old ^ z_mask;
 +    }
 +    ctx->z_mask = z_mask;
 +
 +    return fold_masks(ctx, op);
  }
- static bool fold_extract2(OptContext *ctx, TCGOp *op)
+-static inline uint8_t get_float_default_nan_pattern(float_status *status)
-@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
++static inline uint8_t get_float_default_nan_pattern(const float_status *status)
  static bool fold_exts(OptContext *ctx, TCGOp *op)
  {
--    return fold_const1(ctx, op);
+     return status->default_nan_pattern;
 +    uint64_t z_mask_old, z_mask, sign;
 +    bool type_change = false;
 +
 +    if (fold_const1(ctx, op)) {
 +        return true;
 +    }
 +
 +    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
 +
 +    switch (op->opc) {
 +    CASE_OP_32_64(ext8s):
 +        sign = INT8_MIN;
 +        z_mask = (uint8_t)z_mask;
 +        break;
 +    CASE_OP_32_64(ext16s):
 +        sign = INT16_MIN;
 +        z_mask = (uint16_t)z_mask;
 +        break;
 +    case INDEX_op_ext_i32_i64:
 +        type_change = true;
 +        QEMU_FALLTHROUGH;
 +    case INDEX_op_ext32s_i64:
 +        sign = INT32_MIN;
 +        z_mask = (uint32_t)z_mask;
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
 +
 +    if (z_mask & sign) {
 +        z_mask |= sign;
 +    } else if (!type_change) {
 +        ctx->a_mask = z_mask_old ^ z_mask;
 +    }
 +    ctx->z_mask = z_mask;
 +
 +    return fold_masks(ctx, op);
  }
- static bool fold_extu(OptContext *ctx, TCGOp *op)
+-static inline bool get_flush_to_zero(float_status *status)
 +static inline bool get_flush_to_zero(const float_status *status)
  {
--    return fold_const1(ctx, op);
+     return status->flush_to_zero;
 +    uint64_t z_mask_old, z_mask;
 +    bool type_change = false;
 +
 +    if (fold_const1(ctx, op)) {
 +        return true;
 +    }
 +
 +    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
 +
 +    switch (op->opc) {
 +    CASE_OP_32_64(ext8u):
 +        z_mask = (uint8_t)z_mask;
 +        break;
 +    CASE_OP_32_64(ext16u):
 +        z_mask = (uint16_t)z_mask;
 +        break;
 +    case INDEX_op_extrl_i64_i32:
 +    case INDEX_op_extu_i32_i64:
 +        type_change = true;
 +        QEMU_FALLTHROUGH;
 +    case INDEX_op_ext32u_i64:
 +        z_mask = (uint32_t)z_mask;
 +        break;
 +    case INDEX_op_extrh_i64_i32:
 +        type_change = true;
 +        z_mask >>= 32;
 +        break;
 +    default:
 +        g_assert_not_reached();
 +    }
 +
 +    ctx->z_mask = z_mask;
 +    if (!type_change) {
 +        ctx->a_mask = z_mask_old ^ z_mask;
 +    }
 +    return fold_masks(ctx, op);
  }
- static bool fold_mb(OptContext *ctx, TCGOp *op)
+-static inline bool get_flush_inputs_to_zero(float_status *status)
-@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
++static inline bool get_flush_inputs_to_zero(const float_status *status)
          return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
      }
 +    ctx->z_mask = arg_info(op->args[3])->z_mask
 +                | arg_info(op->args[4])->z_mask;
 +
      if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
          uint64_t tv = arg_info(op->args[3])->val;
          uint64_t fv = arg_info(op->args[4])->val;
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
  static bool fold_neg(OptContext *ctx, TCGOp *op)
  {
-+    uint64_t z_mask;
+     return status->flush_inputs_to_zero;
 +
      if (fold_const1(ctx, op)) {
          return true;
      }
 +
 +    /* Set to 1 all bits to the left of the rightmost.  */
 +    z_mask = arg_info(op->args[1])->z_mask;
 +    ctx->z_mask = -(z_mask & -z_mask);
 +
      /*
       * Because of fold_sub_to_neg, we want to always return true,
       * via finish_folding.
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
          fold_xx_to_x(ctx, op)) {
          return true;
      }
 -    return false;
 +
 +    ctx->z_mask = arg_info(op->args[1])->z_mask
 +                | arg_info(op->args[2])->z_mask;
 +    return fold_masks(ctx, op);
  }
- static bool fold_orc(OptContext *ctx, TCGOp *op)
+-static inline bool get_default_nan_mode(float_status *status)
-@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
++static inline bool get_default_nan_mode(const float_status *status)
  static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
  {
-+    const TCGOpDef *def = &tcg_op_defs[op->opc];
+     return status->default_nan_mode;
 +    MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
 +    MemOp mop = get_memop(oi);
 +    int width = 8 * memop_size(mop);
 +
 +    if (!(mop & MO_SIGN) && width < 64) {
 +        ctx->z_mask = MAKE_64BIT_MASK(0, width);
 +    }
 +
      /* Opcodes that touch guest memory stop the mb optimization.  */
      ctx->prev_mb = NULL;
      return false;
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
      if (i >= 0) {
          return tcg_opt_gen_movi(ctx, op, op->args[0], i);
      }
 +
 +    ctx->z_mask = 1;
      return false;
  }
-@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
-         op->opc = INDEX_op_setcond_i32;
-         break;
-     }
-+
-+    ctx->z_mask = 1;
-     return false;
-  do_setcond_const:
-@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
- static bool fold_sextract(OptContext *ctx, TCGOp *op)
- {
-+    int64_t z_mask_old, z_mask;
-+
-     if (arg_is_const(op->args[1])) {
-         uint64_t t;
-@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
-         t = sextract64(t, op->args[2], op->args[3]);
-         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
-     }
--    return false;
-+
-+    z_mask_old = arg_info(op->args[1])->z_mask;
-+    z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
-+    if (op->args[2] == 0 && z_mask >= 0) {
-+        ctx->a_mask = z_mask_old ^ z_mask;
-+    }
-+    ctx->z_mask = z_mask;
-+
-+    return fold_masks(ctx, op);
- }
- static bool fold_shift(OptContext *ctx, TCGOp *op)
-@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
-         fold_xi_to_x(ctx, op, 0)) {
-         return true;
-     }
-+
-+    if (arg_is_const(op->args[2])) {
-+        ctx->z_mask = do_constant_folding(op->opc, ctx->type,
-+                                          arg_info(op->args[1])->z_mask,
-+                                          arg_info(op->args[2])->val);
-+        return fold_masks(ctx, op);
-+    }
-     return false;
- }
-@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
-     return fold_addsub2_i32(ctx, op, false);
- }
-+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
-+{
-+    /* We can't do any folding with a load, but we can record bits. */
-+    switch (op->opc) {
-+    CASE_OP_32_64(ld8u):
-+        ctx->z_mask = MAKE_64BIT_MASK(0, 8);
-+        break;
-+    CASE_OP_32_64(ld16u):
-+        ctx->z_mask = MAKE_64BIT_MASK(0, 16);
-+        break;
-+    case INDEX_op_ld32u_i64:
-+        ctx->z_mask = MAKE_64BIT_MASK(0, 32);
-+        break;
-+    default:
-+        g_assert_not_reached();
-+    }
-+    return false;
-+}
-+
- static bool fold_xor(OptContext *ctx, TCGOp *op)
- {
-     if (fold_const2(ctx, op) ||
-@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
-         fold_xi_to_not(ctx, op, -1)) {
-         return true;
-     }
--    return false;
-+
-+    ctx->z_mask = arg_info(op->args[1])->z_mask
-+                | arg_info(op->args[2])->z_mask;
-+    return fold_masks(ctx, op);
- }
- /* Propagate constants and copies, fold constant expressions. */
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-     }
-     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
--        uint64_t z_mask, partmask, affected, tmp;
-         TCGOpcode opc = op->opc;
-         const TCGOpDef *def;
-         bool done = false;
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-             break;
-         }
--        /* Simplify using known-zero bits. Currently only ops with a single
--           output argument is supported. */
--        z_mask = -1;
--        affected = -1;
--        switch (opc) {
--        CASE_OP_32_64(ext8s):
--            if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
--                break;
--            }
--            QEMU_FALLTHROUGH;
--        CASE_OP_32_64(ext8u):
--            z_mask = 0xff;
--            goto and_const;
--        CASE_OP_32_64(ext16s):
--            if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
--                break;
--            }
--            QEMU_FALLTHROUGH;
--        CASE_OP_32_64(ext16u):
--            z_mask = 0xffff;
--            goto and_const;
--        case INDEX_op_ext32s_i64:
--            if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
--                break;
--            }
--            QEMU_FALLTHROUGH;
--        case INDEX_op_ext32u_i64:
--            z_mask = 0xffffffffU;
--            goto and_const;
--
--        CASE_OP_32_64(and):
--            z_mask = arg_info(op->args[2])->z_mask;
--            if (arg_is_const(op->args[2])) {
--        and_const:
--                affected = arg_info(op->args[1])->z_mask & ~z_mask;
--            }
--            z_mask = arg_info(op->args[1])->z_mask & z_mask;
--            break;
--
--        case INDEX_op_ext_i32_i64:
--            if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
--                break;
--            }
--            QEMU_FALLTHROUGH;
--        case INDEX_op_extu_i32_i64:
--            /* We do not compute affected as it is a size changing op.  */
--            z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
--            break;
--
--        CASE_OP_32_64(andc):
--            /* Known-zeros does not imply known-ones.  Therefore unless
--               op->args[2] is constant, we can't infer anything from it.  */
--            if (arg_is_const(op->args[2])) {
--                z_mask = ~arg_info(op->args[2])->z_mask;
--                goto and_const;
--            }
--            /* But we certainly know nothing outside args[1] may be set. */
--            z_mask = arg_info(op->args[1])->z_mask;
--            break;
--
--        case INDEX_op_sar_i32:
--            if (arg_is_const(op->args[2])) {
--                tmp = arg_info(op->args[2])->val & 31;
--                z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
--            }
--            break;
--        case INDEX_op_sar_i64:
--            if (arg_is_const(op->args[2])) {
--                tmp = arg_info(op->args[2])->val & 63;
--                z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
--            }
--            break;
--
--        case INDEX_op_shr_i32:
--            if (arg_is_const(op->args[2])) {
--                tmp = arg_info(op->args[2])->val & 31;
--                z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
--            }
--            break;
--        case INDEX_op_shr_i64:
--            if (arg_is_const(op->args[2])) {
--                tmp = arg_info(op->args[2])->val & 63;
--                z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
--            }
--            break;
--
--        case INDEX_op_extrl_i64_i32:
--            z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
--            break;
--        case INDEX_op_extrh_i64_i32:
--            z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
--            break;
--
--        CASE_OP_32_64(shl):
--            if (arg_is_const(op->args[2])) {
--                tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
--                z_mask = arg_info(op->args[1])->z_mask << tmp;
--            }
--            break;
--
--        CASE_OP_32_64(neg):
--            /* Set to 1 all bits to the left of the rightmost.  */
--            z_mask = -(arg_info(op->args[1])->z_mask
--                       & -arg_info(op->args[1])->z_mask);
--            break;
--
--        CASE_OP_32_64(deposit):
--            z_mask = deposit64(arg_info(op->args[1])->z_mask,
--                               op->args[3], op->args[4],
--                               arg_info(op->args[2])->z_mask);
--            break;
--
--        CASE_OP_32_64(extract):
--            z_mask = extract64(arg_info(op->args[1])->z_mask,
--                               op->args[2], op->args[3]);
--            if (op->args[2] == 0) {
--                affected = arg_info(op->args[1])->z_mask & ~z_mask;
--            }
--            break;
--        CASE_OP_32_64(sextract):
--            z_mask = sextract64(arg_info(op->args[1])->z_mask,
--                                op->args[2], op->args[3]);
--            if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
--                affected = arg_info(op->args[1])->z_mask & ~z_mask;
--            }
--            break;
--
--        CASE_OP_32_64(or):
--        CASE_OP_32_64(xor):
--            z_mask = arg_info(op->args[1])->z_mask
--                   | arg_info(op->args[2])->z_mask;
--            break;
--
--        case INDEX_op_clz_i32:
--        case INDEX_op_ctz_i32:
--            z_mask = arg_info(op->args[2])->z_mask | 31;
--            break;
--
--        case INDEX_op_clz_i64:
--        case INDEX_op_ctz_i64:
--            z_mask = arg_info(op->args[2])->z_mask | 63;
--            break;
--
--        case INDEX_op_ctpop_i32:
--            z_mask = 32 | 31;
--            break;
--        case INDEX_op_ctpop_i64:
--            z_mask = 64 | 63;
--            break;
--
--        CASE_OP_32_64(setcond):
--        case INDEX_op_setcond2_i32:
--            z_mask = 1;
--            break;
--
--        CASE_OP_32_64(movcond):
--            z_mask = arg_info(op->args[3])->z_mask
--                   | arg_info(op->args[4])->z_mask;
--            break;
--
--        CASE_OP_32_64(ld8u):
--            z_mask = 0xff;
--            break;
--        CASE_OP_32_64(ld16u):
--            z_mask = 0xffff;
--            break;
--        case INDEX_op_ld32u_i64:
--            z_mask = 0xffffffffu;
--            break;
--
--        CASE_OP_32_64(qemu_ld):
--            {
--                MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
--                MemOp mop = get_memop(oi);
--                if (!(mop & MO_SIGN)) {
--                    z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
--                }
--            }
--            break;
--
--        CASE_OP_32_64(bswap16):
--            z_mask = arg_info(op->args[1])->z_mask;
--            if (z_mask <= 0xffff) {
--                op->args[2] |= TCG_BSWAP_IZ;
--            }
--            z_mask = bswap16(z_mask);
--            switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
--            case TCG_BSWAP_OZ:
--                break;
--            case TCG_BSWAP_OS:
--                z_mask = (int16_t)z_mask;
--                break;
--            default: /* undefined high bits */
--                z_mask |= MAKE_64BIT_MASK(16, 48);
--                break;
--            }
--            break;
--
--        case INDEX_op_bswap32_i64:
--            z_mask = arg_info(op->args[1])->z_mask;
--            if (z_mask <= 0xffffffffu) {
--                op->args[2] |= TCG_BSWAP_IZ;
--            }
--            z_mask = bswap32(z_mask);
--            switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
--            case TCG_BSWAP_OZ:
--                break;
--            case TCG_BSWAP_OS:
--                z_mask = (int32_t)z_mask;
--                break;
--            default: /* undefined high bits */
--                z_mask |= MAKE_64BIT_MASK(32, 32);
--                break;
--            }
--            break;
--
--        default:
--            break;
--        }
--
--        /* 32-bit ops generate 32-bit results.  For the result is zero test
--           below, we can ignore high bits, but for further optimizations we
--           need to record that the high bits contain garbage.  */
--        partmask = z_mask;
--        if (ctx.type == TCG_TYPE_I32) {
--            z_mask |= ~(tcg_target_ulong)0xffffffffu;
--            partmask &= 0xffffffffu;
--            affected &= 0xffffffffu;
--        }
--        ctx.z_mask = z_mask;
--
--        if (partmask == 0) {
--            tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
--            continue;
--        }
--        if (affected == 0) {
--            tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
--            continue;
--        }
-+        /* Assume all bits affected, and no bits known zero. */
-+        ctx.a_mask = -1;
-+        ctx.z_mask = -1;
-         /*
-          * Process each opcode.
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-         case INDEX_op_extrh_i64_i32:
-             done = fold_extu(&ctx, op);
-             break;
-+        CASE_OP_32_64(ld8u):
-+        CASE_OP_32_64(ld16u):
-+        case INDEX_op_ld32u_i64:
-+            done = fold_tcg_ld(&ctx, op);
-+            break;
-         case INDEX_op_mb:
-             done = fold_mb(&ctx, op);
-             break;
 --
-.25.1
+.43.0

The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:

Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211027

for you to fetch changes up to 820c025f0dcacf2f3c12735b1f162893fbfa7bc6:

tcg/optimize: Propagate sign info for shifting (2021-10-27 17:11:23 -0700)

----------------------------------------------------------------
Improvements to qemu/int128
Fixes for 128/64 division.
Cleanup tcg/optimize.c
Optimize redundant sign extensions

----------------------------------------------------------------
Frédéric Pétrot (1):
      qemu/int128: Add int128_{not,xor}

Luis Pires (4):
      host-utils: move checks out of divu128/divs128
      host-utils: move udiv_qrnnd() to host-utils
      host-utils: add 128-bit quotient support to divu128/divs128
      host-utils: add unit tests for divu128/divs128

Richard Henderson (51):
      tcg/optimize: Rename "mask" to "z_mask"
      tcg/optimize: Split out OptContext
      tcg/optimize: Remove do_default label
      tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
      tcg/optimize: Move prev_mb into OptContext
      tcg/optimize: Split out init_arguments
      tcg/optimize: Split out copy_propagate
      tcg/optimize: Split out fold_call
      tcg/optimize: Drop nb_oargs, nb_iargs locals
      tcg/optimize: Change fail return for do_constant_folding_cond*
      tcg/optimize: Return true from tcg_opt_gen_{mov,movi}
      tcg/optimize: Split out finish_folding
      tcg/optimize: Use a boolean to avoid a mass of continues
      tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
      tcg/optimize: Split out fold_const{1,2}
      tcg/optimize: Split out fold_setcond2
      tcg/optimize: Split out fold_brcond2
      tcg/optimize: Split out fold_brcond
      tcg/optimize: Split out fold_setcond
      tcg/optimize: Split out fold_mulu2_i32
      tcg/optimize: Split out fold_addsub2_i32
      tcg/optimize: Split out fold_movcond
      tcg/optimize: Split out fold_extract2
      tcg/optimize: Split out fold_extract, fold_sextract
      tcg/optimize: Split out fold_deposit
      tcg/optimize: Split out fold_count_zeros
      tcg/optimize: Split out fold_bswap
      tcg/optimize: Split out fold_dup, fold_dup2
      tcg/optimize: Split out fold_mov
      tcg/optimize: Split out fold_xx_to_i
      tcg/optimize: Split out fold_xx_to_x
      tcg/optimize: Split out fold_xi_to_i
      tcg/optimize: Add type to OptContext
      tcg/optimize: Split out fold_to_not
      tcg/optimize: Split out fold_sub_to_neg
      tcg/optimize: Split out fold_xi_to_x
      tcg/optimize: Split out fold_ix_to_i
      tcg/optimize: Split out fold_masks
      tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
      tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
      tcg/optimize: Sink commutative operand swapping into fold functions
      tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
      tcg/optimize: Use fold_xx_to_i for orc
      tcg/optimize: Use fold_xi_to_x for mul
      tcg/optimize: Use fold_xi_to_x for div
      tcg/optimize: Use fold_xx_to_i for rem
      tcg/optimize: Optimize sign extensions
      tcg/optimize: Propagate sign info for logical operations
      tcg/optimize: Propagate sign info for setcond
      tcg/optimize: Propagate sign info for bit counting
      tcg/optimize: Propagate sign info for shifting

From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>

Addition of not and xor on 128-bit integers.

Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
[rth: Split out logical operations.]
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/qemu/int128.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index XXXXXXX..XXXXXXX 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
     return a;
 }
 
+static inline Int128 int128_not(Int128 a)
+{
+    return ~a;
+}
+
 static inline Int128 int128_and(Int128 a, Int128 b)
 {
     return a & b;
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
     return a | b;
 }
 
+static inline Int128 int128_xor(Int128 a, Int128 b)
+{
+    return a ^ b;
+}
+
 static inline Int128 int128_rshift(Int128 a, int n)
 {
     return a >> n;
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
     return int128_make128(a, (a < 0) ? -1 : 0);
 }
 
+static inline Int128 int128_not(Int128 a)
+{
+    return int128_make128(~a.lo, ~a.hi);
+}
+
 static inline Int128 int128_and(Int128 a, Int128 b)
 {
     return int128_make128(a.lo & b.lo, a.hi & b.hi);
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
     return int128_make128(a.lo | b.lo, a.hi | b.hi);
 }
 
+static inline Int128 int128_xor(Int128 a, Int128 b)
+{
+    return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
+}
+
 static inline Int128 int128_rshift(Int128 a, int n)
 {
     int64_t h;
-- 
2.25.1

From: Luis Pires <luis.pires@eldorado.org.br>

In preparation for changing the divu128/divs128 implementations
to allow for quotients larger than 64 bits, move the div-by-zero
and overflow checks to the callers.

Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/hw/clock.h        |  5 +++--
 include/qemu/host-utils.h | 34 ++++++++++++---------------------
 target/ppc/int_helper.c   | 14 +++++++++-----
 util/host-utils.c         | 40 ++++++++++++++++++---------------------
 4 files changed, 42 insertions(+), 51 deletions(-)

diff --git a/include/hw/clock.h b/include/hw/clock.h
index XXXXXXX..XXXXXXX 100644
--- a/include/hw/clock.h
+++ b/include/hw/clock.h
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
         return 0;
     }
     /*
-     * Ignore divu128() return value as we've caught div-by-zero and don't
-     * need different behaviour for overflow.
+     * BUG: when CONFIG_INT128 is not defined, the current implementation of
+     * divu128 does not return a valid truncated quotient, so the result will
+     * be wrong.
      */
     divu128(&lo, &hi, clk->period);
     return lo;
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index XXXXXXX..XXXXXXX 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
     return (__int128_t)a * b / c;
 }
 
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 {
-    if (divisor == 0) {
-        return 1;
-    } else {
-        __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
-        __uint128_t result = dividend / divisor;
-        *plow = result;
-        *phigh = dividend % divisor;
-        return result > UINT64_MAX;
-    }
+    __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
+    __uint128_t result = dividend / divisor;
+    *plow = result;
+    *phigh = dividend % divisor;
 }
 
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
 {
-    if (divisor == 0) {
-        return 1;
-    } else {
-        __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
-        __int128_t result = dividend / divisor;
-        *plow = result;
-        *phigh = dividend % divisor;
-        return result != *plow;
-    }
+    __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
+    __int128_t result = dividend / divisor;
+    *plow = result;
+    *phigh = dividend % divisor;
 }
 #else
 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
 
 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
 {
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
     uint64_t rt = 0;
     int overflow = 0;
 
-    overflow = divu128(&rt, &ra, rb);
-
-    if (unlikely(overflow)) {
+    if (unlikely(rb == 0 || ra >= rb)) {
+        overflow = 1;
         rt = 0; /* Undefined */
+    } else {
+        divu128(&rt, &ra, rb);
     }
 
     if (oe) {
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
     int64_t rt = 0;
     int64_t ra = (int64_t)rau;
     int64_t rb = (int64_t)rbu;
-    int overflow = divs128(&rt, &ra, rb);
+    int overflow = 0;
 
-    if (unlikely(overflow)) {
+    if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
+        overflow = 1;
         rt = 0; /* Undefined */
+    } else {
+        divs128(&rt, &ra, rb);
     }
 
     if (oe) {
diff --git a/util/host-utils.c b/util/host-utils.c
index XXXXXXX..XXXXXXX 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
     *phigh = rh;
 }
 
-/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */
-/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */
-/* remainder via phigh. */
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+/*
+ * Unsigned 128-by-64 division. Returns quotient via plow and
+ * remainder via phigh.
+ * The result must fit in 64 bits (plow) - otherwise, the result
+ * is undefined.
+ * This function will cause a division by zero if passed a zero divisor.
+ */
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 {
     uint64_t dhi = *phigh;
     uint64_t dlo = *plow;
     unsigned i;
     uint64_t carry = 0;
 
-    if (divisor == 0) {
-        return 1;
-    } else if (dhi == 0) {
+    if (divisor == 0 || dhi == 0) {
         *plow  = dlo / divisor;
         *phigh = dlo % divisor;
-        return 0;
-    } else if (dhi >= divisor) {
-        return 1;
     } else {
 
         for (i = 0; i < 64; i++) {
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 
         *plow = dlo;
         *phigh = dhi;
-        return 0;
     }
 }
 
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+/*
+ * Signed 128-by-64 division. Returns quotient via plow and
+ * remainder via phigh.
+ * The result must fit in 64 bits (plow) - otherwise, the result
+ * is undefined.
+ * This function will cause a division by zero if passed a zero divisor.
+ */
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
 {
     int sgn_dvdnd = *phigh < 0;
     int sgn_divsr = divisor < 0;
-    int overflow = 0;
 
     if (sgn_dvdnd) {
         *plow = ~(*plow);
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
         divisor = 0 - divisor;
     }
 
-    overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
+    divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
 
     if (sgn_dvdnd  ^ sgn_divsr) {
         *plow = 0 - *plow;
     }
-
-    if (!overflow) {
-        if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
-            overflow = 1;
-        }
-    }
-
-    return overflow;
 }
 #endif
 
-- 
2.25.1

From: Luis Pires <luis.pires@eldorado.org.br>

Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
so it can be reused by divu128().

Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/fpu/softfloat-macros.h | 82 ----------------------------------
 include/qemu/host-utils.h      | 81 +++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 82 deletions(-)

diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index XXXXXXX..XXXXXXX 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -XXX,XX +XXX,XX @@
  * so some portions are provided under:
  *  the SoftFloat-2a license
  *  the BSD license
- *  GPL-v2-or-later
  *
  * Any future contributions to this file after December 1st 2014 will be
  * taken to be licensed under the Softfloat-2a license unless specifically
@@ -XXX,XX +XXX,XX @@ this code that are retained.
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/* Portions of this work are licensed under the terms of the GNU GPL,
- * version 2 or later. See the COPYING file in the top-level directory.
- */
-
 #ifndef FPU_SOFTFLOAT_MACROS_H
 #define FPU_SOFTFLOAT_MACROS_H
 
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
 
 }
 
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
- *
- * Licensed under the GPLv2/LGPLv3
- */
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
-                                  uint64_t n0, uint64_t d)
-{
-#if defined(__x86_64__)
-    uint64_t q;
-    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
-    return q;
-#elif defined(__s390x__) && !defined(__clang__)
-    /* Need to use a TImode type to get an even register pair for DLGR.  */
-    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
-    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
-    *r = n >> 64;
-    return n;
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
-    /* From Power ISA 2.06, programming note for divdeu.  */
-    uint64_t q1, q2, Q, r1, r2, R;
-    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
-        : "=&r"(q1), "=r"(q2)
-        : "r"(n1), "r"(n0), "r"(d));
-    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
-    r2 = n0 - (q2 * d);
-    Q = q1 + q2;
-    R = r1 + r2;
-    if (R >= d || R < r2) { /* overflow implies R > d */
-        Q += 1;
-        R -= d;
-    }
-    *r = R;
-    return Q;
-#else
-    uint64_t d0, d1, q0, q1, r1, r0, m;
-
-    d0 = (uint32_t)d;
-    d1 = d >> 32;
-
-    r1 = n1 % d1;
-    q1 = n1 / d1;
-    m = q1 * d0;
-    r1 = (r1 << 32) | (n0 >> 32);
-    if (r1 < m) {
-        q1 -= 1;
-        r1 += d;
-        if (r1 >= d) {
-            if (r1 < m) {
-                q1 -= 1;
-                r1 += d;
-            }
-        }
-    }
-    r1 -= m;
-
-    r0 = r1 % d1;
-    q0 = r1 / d1;
-    m = q0 * d0;
-    r0 = (r0 << 32) | (uint32_t)n0;
-    if (r0 < m) {
-        q0 -= 1;
-        r0 += d;
-        if (r0 >= d) {
-            if (r0 < m) {
-                q0 -= 1;
-                r0 += d;
-            }
-        }
-    }
-    r0 -= m;
-
-    *r = r0;
-    return (q1 << 32) | q0;
-#endif
-}
-
 /*----------------------------------------------------------------------------
 | Returns an approximation to the square root of the 32-bit significand given
 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index XXXXXXX..XXXXXXX 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -XXX,XX +XXX,XX @@
  * THE SOFTWARE.
  */
 
+/* Portions of this work are licensed under the terms of the GNU GPL,
+ * version 2 or later. See the COPYING file in the top-level directory.
+ */
+
 #ifndef HOST_UTILS_H
 #define HOST_UTILS_H
 
@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
  */
 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
+ *
+ * Licensed under the GPLv2/LGPLv3
+ */
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
+                                  uint64_t n0, uint64_t d)
+{
+#if defined(__x86_64__)
+    uint64_t q;
+    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
+    return q;
+#elif defined(__s390x__) && !defined(__clang__)
+    /* Need to use a TImode type to get an even register pair for DLGR.  */
+    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
+    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
+    *r = n >> 64;
+    return n;
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
+    /* From Power ISA 2.06, programming note for divdeu.  */
+    uint64_t q1, q2, Q, r1, r2, R;
+    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
+        : "=&r"(q1), "=r"(q2)
+        : "r"(n1), "r"(n0), "r"(d));
+    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
+    r2 = n0 - (q2 * d);
+    Q = q1 + q2;
+    R = r1 + r2;
+    if (R >= d || R < r2) { /* overflow implies R > d */
+        Q += 1;
+        R -= d;
+    }
+    *r = R;
+    return Q;
+#else
+    uint64_t d0, d1, q0, q1, r1, r0, m;
+
+    d0 = (uint32_t)d;
+    d1 = d >> 32;
+
+    r1 = n1 % d1;
+    q1 = n1 / d1;
+    m = q1 * d0;
+    r1 = (r1 << 32) | (n0 >> 32);
+    if (r1 < m) {
+        q1 -= 1;
+        r1 += d;
+        if (r1 >= d) {
+            if (r1 < m) {
+                q1 -= 1;
+                r1 += d;
+            }
+        }
+    }
+    r1 -= m;
+
+    r0 = r1 % d1;
+    q0 = r1 / d1;
+    m = q0 * d0;
+    r0 = (r0 << 32) | (uint32_t)n0;
+    if (r0 < m) {
+        q0 -= 1;
+        r0 += d;
+        if (r0 >= d) {
+            if (r0 < m) {
+                q0 -= 1;
+                r0 += d;
+            }
+        }
+    }
+    r0 -= m;
+
+    *r = r0;
+    return (q1 << 32) | q0;
+#endif
+}
+
 #endif
-- 
2.25.1

From: Luis Pires <luis.pires@eldorado.org.br>

These will be used to implement new decimal floating point
instructions from Power ISA 3.1.

The remainder is now returned directly by divu128/divs128,
freeing up phigh to receive the high 64 bits of the quotient.

Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/hw/clock.h        |   6 +-
 include/qemu/host-utils.h |  20 ++++--
 target/ppc/int_helper.c   |   9 +--
 util/host-utils.c         | 133 +++++++++++++++++++++++++-------------
 4 files changed, 108 insertions(+), 60 deletions(-)

diff --git a/include/hw/clock.h b/include/hw/clock.h
index XXXXXXX..XXXXXXX 100644
--- a/include/hw/clock.h
+++ b/include/hw/clock.h
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
     if (clk->period == 0) {
         return 0;
     }
-    /*
-     * BUG: when CONFIG_INT128 is not defined, the current implementation of
-     * divu128 does not return a valid truncated quotient, so the result will
-     * be wrong.
-     */
+
     divu128(&lo, &hi, clk->period);
     return lo;
 }
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index XXXXXXX..XXXXXXX 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
     return (__int128_t)a * b / c;
 }
 
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
+                               uint64_t divisor)
 {
     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
     __uint128_t result = dividend / divisor;
+
     *plow = result;
-    *phigh = dividend % divisor;
+    *phigh = result >> 64;
+    return dividend % divisor;
 }
 
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
+                              int64_t divisor)
 {
-    __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
+    __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
     __int128_t result = dividend / divisor;
+
     *plow = result;
-    *phigh = dividend % divisor;
+    *phigh = result >> 64;
+    return dividend % divisor;
 }
 #else
 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
 
 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
 {
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 
 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 {
-    int64_t rt = 0;
+    uint64_t rt = 0;
     int64_t ra = (int64_t)rau;
     int64_t rb = (int64_t)rbu;
     int overflow = 0;
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
     int cr;
     uint64_t lo_value;
     uint64_t hi_value;
+    uint64_t rem;
     ppc_avr_t ret = { .u64 = { 0, 0 } };
 
     if (b->VsrSD(0) < 0) {
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
          * In that case, we leave r unchanged.
          */
     } else {
-        divu128(&lo_value, &hi_value, 1000000000000000ULL);
+        rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
 
-        for (i = 1; i < 16; hi_value /= 10, i++) {
-            bcd_put_digit(&ret, hi_value % 10, i);
+        for (i = 1; i < 16; rem /= 10, i++) {
+            bcd_put_digit(&ret, rem % 10, i);
         }
 
         for (; i < 32; lo_value /= 10, i++) {
diff --git a/util/host-utils.c b/util/host-utils.c
index XXXXXXX..XXXXXXX 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
 }
 
 /*
- * Unsigned 128-by-64 division. Returns quotient via plow and
- * remainder via phigh.
- * The result must fit in 64 bits (plow) - otherwise, the result
- * is undefined.
- * This function will cause a division by zero if passed a zero divisor.
+ * Unsigned 128-by-64 division.
+ * Returns the remainder.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
  */
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 {
     uint64_t dhi = *phigh;
     uint64_t dlo = *plow;
-    unsigned i;
-    uint64_t carry = 0;
+    uint64_t rem, dhighest;
+    int sh;
 
     if (divisor == 0 || dhi == 0) {
         *plow  = dlo / divisor;
-        *phigh = dlo % divisor;
+        *phigh = 0;
+        return dlo % divisor;
     } else {
+        sh = clz64(divisor);
 
-        for (i = 0; i < 64; i++) {
-            carry = dhi >> 63;
-            dhi = (dhi << 1) | (dlo >> 63);
-            if (carry || (dhi >= divisor)) {
-                dhi -= divisor;
-                carry = 1;
-            } else {
-                carry = 0;
+        if (dhi < divisor) {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
             }
-            dlo = (dlo << 1) | carry;
+
+            *phigh = 0;
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
+        } else {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhighest = dhi >> (64 - sh);
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
+
+                *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
+            } else {
+                /**
+                 * dhi >= divisor
+                 * Since the MSB of divisor is set (sh == 0),
+                 * (dhi - divisor) < divisor
+                 *
+                 * Thus, the high part of the quotient is 1, and we can
+                 * calculate the low part with a single call to udiv_qrnnd
+                 * after subtracting divisor from dhi
+                 */
+                dhi -= divisor;
+                *phigh = 1;
+            }
+
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
         }
 
-        *plow = dlo;
-        *phigh = dhi;
+        /*
+         * since the dividend/divisor might have been normalized,
+         * the remainder might also have to be shifted back
+         */
+        return rem >> sh;
     }
 }
 
 /*
- * Signed 128-by-64 division. Returns quotient via plow and
- * remainder via phigh.
- * The result must fit in 64 bits (plow) - otherwise, the result
- * is undefined.
- * This function will cause a division by zero if passed a zero divisor.
+ * Signed 128-by-64 division.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
  */
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
 {
-    int sgn_dvdnd = *phigh < 0;
-    int sgn_divsr = divisor < 0;
+    bool neg_quotient = false, neg_remainder = false;
+    uint64_t unsig_hi = *phigh, unsig_lo = *plow;
+    uint64_t rem;
 
-    if (sgn_dvdnd) {
-        *plow = ~(*plow);
-        *phigh = ~(*phigh);
-        if (*plow == (int64_t)-1) {
+    if (*phigh < 0) {
+        neg_quotient = !neg_quotient;
+        neg_remainder = !neg_remainder;
+
+        if (unsig_lo == 0) {
+            unsig_hi = -unsig_hi;
+        } else {
+            unsig_hi = ~unsig_hi;
+            unsig_lo = -unsig_lo;
+        }
+    }
+
+    if (divisor < 0) {
+        neg_quotient = !neg_quotient;
+
+        divisor = -divisor;
+    }
+
+    rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
+
+    if (neg_quotient) {
+        if (unsig_lo == 0) {
+            *phigh = -unsig_hi;
             *plow = 0;
-            (*phigh)++;
-         } else {
-            (*plow)++;
-         }
+        } else {
+            *phigh = ~unsig_hi;
+            *plow = -unsig_lo;
+        }
+    } else {
+        *phigh = unsig_hi;
+        *plow = unsig_lo;
     }
 
-    if (sgn_divsr) {
-        divisor = 0 - divisor;
-    }
-
-    divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
-
-    if (sgn_dvdnd  ^ sgn_divsr) {
-        *plow = 0 - *plow;
+    if (neg_remainder) {
+        return -rem;
+    } else {
+        return rem;
     }
 }
 #endif
-- 
2.25.1

From: Luis Pires <luis.pires@eldorado.org.br>

Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
 tests/unit/meson.build   |   1 +
 2 files changed, 198 insertions(+)
 create mode 100644 tests/unit/test-div128.c

diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tests/unit/test-div128.c
@@ -XXX,XX +XXX,XX @@
+/*
+ * Test 128-bit division functions
+ *
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+
+typedef struct {
+    uint64_t high;
+    uint64_t low;
+    uint64_t rhigh;
+    uint64_t rlow;
+    uint64_t divisor;
+    uint64_t remainder;
+} test_data_unsigned;
+
+typedef struct {
+    int64_t high;
+    uint64_t low;
+    int64_t rhigh;
+    uint64_t rlow;
+    int64_t divisor;
+    int64_t remainder;
+} test_data_signed;
+
+static const test_data_unsigned test_table_unsigned[] = {
+    /* Dividend fits in 64 bits */
+    { 0x0000000000000000ULL, 0x0000000000000000ULL,
+      0x0000000000000000ULL, 0x0000000000000000ULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0x0000000000000000ULL, 0x0000000000000001ULL,
+      0x0000000000000000ULL, 0x0000000000000001ULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0x0000000000000000ULL, 0x0000000000000003ULL,
+      0x0000000000000000ULL, 0x0000000000000001ULL,
+      0x0000000000000002ULL, 0x0000000000000001ULL},
+    { 0x0000000000000000ULL, 0x8000000000000000ULL,
+      0x0000000000000000ULL, 0x8000000000000000ULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0x0000000000000000ULL, 0xa000000000000000ULL,
+      0x0000000000000000ULL, 0x0000000000000002ULL,
+      0x4000000000000000ULL, 0x2000000000000000ULL},
+    { 0x0000000000000000ULL, 0x8000000000000000ULL,
+      0x0000000000000000ULL, 0x0000000000000001ULL,
+      0x8000000000000000ULL, 0x0000000000000000ULL},
+
+    /* Dividend > 64 bits, with MSB 0 */
+    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
+      0x123456789abcdefeULL, 0xefedcba987654321ULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
+      0x0000000000000001ULL, 0x000000000000000dULL,
+      0x123456789abcdefeULL, 0x03456789abcdf03bULL},
+    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
+      0x0123456789abcdefULL, 0xeefedcba98765432ULL,
+      0x0000000000000010ULL, 0x0000000000000001ULL},
+
+    /* Dividend > 64 bits, with MSB 1 */
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL,
+      0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
+      0x0000000000000010ULL, 0x000000000000000fULL},
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
+      0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
+
+    /**
+     * Divisor == 64 bits, with MSB 1
+     * and high 64 bits of dividend >= divisor
+     * (for testing normalization)
+     */
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL,
+      0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0000000000000001ULL, 0xfddbb9977553310aULL,
+      0x8000000000000001ULL, 0x78899aabbccddf05ULL},
+
+    /* Dividend > 64 bits, divisor almost as big */
+    { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
+      0x0000000000000000ULL, 0x000000000000000fULL,
+      0x123456789abcdefeULL, 0x123456789abcde1fULL},
+};
+
+static const test_data_signed test_table_signed[] = {
+    /* Positive dividend, positive/negative divisors */
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000001LL, 0x0000000000000000LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0x0000000000000000LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000000LL, 0x00000000005e30a7ULL,
+      0x0000000000000002LL, 0x0000000000000000LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
+      0xfffffffffffffffeLL, 0x0000000000000000LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000000LL, 0x0000000000178c29ULL,
+      0x0000000000000008LL, 0x0000000000000006LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
+      0xfffffffffffffff8LL, 0x0000000000000006LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000000LL, 0x000000000000550dULL,
+      0x0000000000000237LL, 0x0000000000000183LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
+      0xfffffffffffffdc9LL, 0x0000000000000183LL},
+
+    /* Negative dividend, positive/negative divisors */
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000001LL, 0x0000000000000000LL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0x0000000000000000LL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
+      0x0000000000000002LL, 0x0000000000000000LL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000000LL, 0x00000000005e30a7ULL,
+      0xfffffffffffffffeLL, 0x0000000000000000LL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
+      0x0000000000000008LL, 0xfffffffffffffffaLL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000000LL, 0x0000000000178c29ULL,
+      0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
+      0x0000000000000237LL, 0xfffffffffffffe7dLL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000000LL, 0x000000000000550dULL,
+      0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
+};
+
+static void test_divu128(void)
+{
+    int i;
+    uint64_t rem;
+    test_data_unsigned tmp;
+
+    for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
+        tmp = test_table_unsigned[i];
+
+        rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
+        g_assert_cmpuint(tmp.low, ==, tmp.rlow);
+        g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
+        g_assert_cmpuint(rem, ==, tmp.remainder);
+    }
+}
+
+static void test_divs128(void)
+{
+    int i;
+    int64_t rem;
+    test_data_signed tmp;
+
+    for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
+        tmp = test_table_signed[i];
+
+        rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
+        g_assert_cmpuint(tmp.low, ==, tmp.rlow);
+        g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
+        g_assert_cmpuint(rem, ==, tmp.remainder);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/host-utils/test_divu128", test_divu128);
+    g_test_add_func("/host-utils/test_divs128", test_divs128);
+    return g_test_run();
+}
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index XXXXXXX..XXXXXXX 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -XXX,XX +XXX,XX @@ tests = {
   # all code tested by test-x86-cpuid is inside topology.h
   'test-x86-cpuid': [],
   'test-cutils': [],
+  'test-div128': [],
   'test-shift128': [],
   'test-mul64': [],
   # all code tested by test-int128 is inside int128.h
-- 
2.25.1

Prepare for tracking different masks by renaming this one.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
 1 file changed, 72 insertions(+), 70 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
     TCGTemp *prev_copy;
     TCGTemp *next_copy;
     uint64_t val;
-    uint64_t mask;
+    uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
 } TempOptInfo;
 
 static inline TempOptInfo *ts_info(TCGTemp *ts)
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
     ti->next_copy = ts;
     ti->prev_copy = ts;
     ti->is_const = false;
-    ti->mask = -1;
+    ti->z_mask = -1;
 }
 
 static void reset_temp(TCGArg arg)
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
     if (ts->kind == TEMP_CONST) {
         ti->is_const = true;
         ti->val = ts->val;
-        ti->mask = ts->val;
+        ti->z_mask = ts->val;
         if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
             /* High bits of a 32-bit quantity are garbage.  */
-            ti->mask |= ~0xffffffffull;
+            ti->z_mask |= ~0xffffffffull;
         }
     } else {
         ti->is_const = false;
-        ti->mask = -1;
+        ti->z_mask = -1;
     }
 }
 
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
     const TCGOpDef *def;
     TempOptInfo *di;
     TempOptInfo *si;
-    uint64_t mask;
+    uint64_t z_mask;
     TCGOpcode new_op;
 
     if (ts_are_copies(dst_ts, src_ts)) {
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
     op->args[0] = dst;
     op->args[1] = src;
 
-    mask = si->mask;
+    z_mask = si->z_mask;
     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
         /* High bits of the destination are now garbage.  */
-        mask |= ~0xffffffffull;
+        z_mask |= ~0xffffffffull;
     }
-    di->mask = mask;
+    di->z_mask = z_mask;
 
     if (src_ts->type == dst_ts->type) {
         TempOptInfo *ni = ts_info(si->next_copy);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
     }
 
     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
-        uint64_t mask, partmask, affected, tmp;
+        uint64_t z_mask, partmask, affected, tmp;
         int nb_oargs, nb_iargs;
         TCGOpcode opc = op->opc;
         const TCGOpDef *def = &tcg_op_defs[opc];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
         /* Simplify using known-zero bits. Currently only ops with a single
            output argument is supported. */
-        mask = -1;
+        z_mask = -1;
         affected = -1;
         switch (opc) {
         CASE_OP_32_64(ext8s):
-            if ((arg_info(op->args[1])->mask & 0x80) != 0) {
+            if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
                 break;
             }
             QEMU_FALLTHROUGH;
         CASE_OP_32_64(ext8u):
-            mask = 0xff;
+            z_mask = 0xff;
             goto and_const;
         CASE_OP_32_64(ext16s):
-            if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
+            if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
                 break;
             }
             QEMU_FALLTHROUGH;
         CASE_OP_32_64(ext16u):
-            mask = 0xffff;
+            z_mask = 0xffff;
             goto and_const;
         case INDEX_op_ext32s_i64:
-            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
+            if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
                 break;
             }
             QEMU_FALLTHROUGH;
         case INDEX_op_ext32u_i64:
-            mask = 0xffffffffU;
+            z_mask = 0xffffffffU;
             goto and_const;
 
         CASE_OP_32_64(and):
-            mask = arg_info(op->args[2])->mask;
+            z_mask = arg_info(op->args[2])->z_mask;
             if (arg_is_const(op->args[2])) {
         and_const:
-                affected = arg_info(op->args[1])->mask & ~mask;
+                affected = arg_info(op->args[1])->z_mask & ~z_mask;
             }
-            mask = arg_info(op->args[1])->mask & mask;
+            z_mask = arg_info(op->args[1])->z_mask & z_mask;
             break;
 
         case INDEX_op_ext_i32_i64:
-            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
+            if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
                 break;
             }
             QEMU_FALLTHROUGH;
         case INDEX_op_extu_i32_i64:
             /* We do not compute affected as it is a size changing op.  */
-            mask = (uint32_t)arg_info(op->args[1])->mask;
+            z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
             break;
 
         CASE_OP_32_64(andc):
             /* Known-zeros does not imply known-ones.  Therefore unless
                op->args[2] is constant, we can't infer anything from it.  */
             if (arg_is_const(op->args[2])) {
-                mask = ~arg_info(op->args[2])->mask;
+                z_mask = ~arg_info(op->args[2])->z_mask;
                 goto and_const;
             }
             /* But we certainly know nothing outside args[1] may be set. */
-            mask = arg_info(op->args[1])->mask;
+            z_mask = arg_info(op->args[1])->z_mask;
             break;
 
         case INDEX_op_sar_i32:
             if (arg_is_const(op->args[2])) {
                 tmp = arg_info(op->args[2])->val & 31;
-                mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
+                z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
             }
             break;
         case INDEX_op_sar_i64:
             if (arg_is_const(op->args[2])) {
                 tmp = arg_info(op->args[2])->val & 63;
-                mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
+                z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
             }
             break;
 
         case INDEX_op_shr_i32:
             if (arg_is_const(op->args[2])) {
                 tmp = arg_info(op->args[2])->val & 31;
-                mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
+                z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
             }
             break;
         case INDEX_op_shr_i64:
             if (arg_is_const(op->args[2])) {
                 tmp = arg_info(op->args[2])->val & 63;
-                mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
+                z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
             }
             break;
 
         case INDEX_op_extrl_i64_i32:
-            mask = (uint32_t)arg_info(op->args[1])->mask;
+            z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
             break;
         case INDEX_op_extrh_i64_i32:
-            mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
+            z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
             break;
 
         CASE_OP_32_64(shl):
             if (arg_is_const(op->args[2])) {
                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
-                mask = arg_info(op->args[1])->mask << tmp;
+                z_mask = arg_info(op->args[1])->z_mask << tmp;
             }
             break;
 
         CASE_OP_32_64(neg):
             /* Set to 1 all bits to the left of the rightmost.  */
-            mask = -(arg_info(op->args[1])->mask
-                     & -arg_info(op->args[1])->mask);
+            z_mask = -(arg_info(op->args[1])->z_mask
+                       & -arg_info(op->args[1])->z_mask);
             break;
 
         CASE_OP_32_64(deposit):
-            mask = deposit64(arg_info(op->args[1])->mask,
-                             op->args[3], op->args[4],
-                             arg_info(op->args[2])->mask);
+            z_mask = deposit64(arg_info(op->args[1])->z_mask,
+                               op->args[3], op->args[4],
+                               arg_info(op->args[2])->z_mask);
             break;
 
         CASE_OP_32_64(extract):
-            mask = extract64(arg_info(op->args[1])->mask,
-                             op->args[2], op->args[3]);
+            z_mask = extract64(arg_info(op->args[1])->z_mask,
+                               op->args[2], op->args[3]);
             if (op->args[2] == 0) {
-                affected = arg_info(op->args[1])->mask & ~mask;
+                affected = arg_info(op->args[1])->z_mask & ~z_mask;
             }
             break;
         CASE_OP_32_64(sextract):
-            mask = sextract64(arg_info(op->args[1])->mask,
-                              op->args[2], op->args[3]);
-            if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
-                affected = arg_info(op->args[1])->mask & ~mask;
+            z_mask = sextract64(arg_info(op->args[1])->z_mask,
+                                op->args[2], op->args[3]);
+            if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
+                affected = arg_info(op->args[1])->z_mask & ~z_mask;
             }
             break;
 
         CASE_OP_32_64(or):
         CASE_OP_32_64(xor):
-            mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
+            z_mask = arg_info(op->args[1])->z_mask
+                   | arg_info(op->args[2])->z_mask;
             break;
 
         case INDEX_op_clz_i32:
         case INDEX_op_ctz_i32:
-            mask = arg_info(op->args[2])->mask | 31;
+            z_mask = arg_info(op->args[2])->z_mask | 31;
             break;
 
         case INDEX_op_clz_i64:
         case INDEX_op_ctz_i64:
-            mask = arg_info(op->args[2])->mask | 63;
+            z_mask = arg_info(op->args[2])->z_mask | 63;
             break;
 
         case INDEX_op_ctpop_i32:
-            mask = 32 | 31;
+            z_mask = 32 | 31;
             break;
         case INDEX_op_ctpop_i64:
-            mask = 64 | 63;
+            z_mask = 64 | 63;
             break;
 
         CASE_OP_32_64(setcond):
         case INDEX_op_setcond2_i32:
-            mask = 1;
+            z_mask = 1;
             break;
 
         CASE_OP_32_64(movcond):
-            mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
+            z_mask = arg_info(op->args[3])->z_mask
+                   | arg_info(op->args[4])->z_mask;
             break;
 
         CASE_OP_32_64(ld8u):
-            mask = 0xff;
+            z_mask = 0xff;
             break;
         CASE_OP_32_64(ld16u):
-            mask = 0xffff;
+            z_mask = 0xffff;
             break;
         case INDEX_op_ld32u_i64:
-            mask = 0xffffffffu;
+            z_mask = 0xffffffffu;
             break;
 
         CASE_OP_32_64(qemu_ld):
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 MemOpIdx oi = op->args[nb_oargs + nb_iargs];
                 MemOp mop = get_memop(oi);
                 if (!(mop & MO_SIGN)) {
-                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
+                    z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
                 }
             }
             break;
 
         CASE_OP_32_64(bswap16):
-            mask = arg_info(op->args[1])->mask;
-            if (mask <= 0xffff) {
+            z_mask = arg_info(op->args[1])->z_mask;
+            if (z_mask <= 0xffff) {
                 op->args[2] |= TCG_BSWAP_IZ;
             }
-            mask = bswap16(mask);
+            z_mask = bswap16(z_mask);
             switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
             case TCG_BSWAP_OZ:
                 break;
             case TCG_BSWAP_OS:
-                mask = (int16_t)mask;
+                z_mask = (int16_t)z_mask;
                 break;
             default: /* undefined high bits */
-                mask |= MAKE_64BIT_MASK(16, 48);
+                z_mask |= MAKE_64BIT_MASK(16, 48);
                 break;
             }
             break;
 
         case INDEX_op_bswap32_i64:
-            mask = arg_info(op->args[1])->mask;
-            if (mask <= 0xffffffffu) {
+            z_mask = arg_info(op->args[1])->z_mask;
+            if (z_mask <= 0xffffffffu) {
                 op->args[2] |= TCG_BSWAP_IZ;
             }
-            mask = bswap32(mask);
+            z_mask = bswap32(z_mask);
             switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
             case TCG_BSWAP_OZ:
                 break;
             case TCG_BSWAP_OS:
-                mask = (int32_t)mask;
+                z_mask = (int32_t)z_mask;
                 break;
             default: /* undefined high bits */
-                mask |= MAKE_64BIT_MASK(32, 32);
+                z_mask |= MAKE_64BIT_MASK(32, 32);
                 break;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         /* 32-bit ops generate 32-bit results.  For the result is zero test
            below, we can ignore high bits, but for further optimizations we
            need to record that the high bits contain garbage.  */
-        partmask = mask;
+        partmask = z_mask;
         if (!(def->flags & TCG_OPF_64BIT)) {
-            mask |= ~(tcg_target_ulong)0xffffffffu;
+            z_mask |= ~(tcg_target_ulong)0xffffffffu;
             partmask &= 0xffffffffu;
             affected &= 0xffffffffu;
         }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                    vs the high word of the input.  */
             do_setcond_high:
                 reset_temp(op->args[0]);
-                arg_info(op->args[0])->mask = 1;
+                arg_info(op->args[0])->z_mask = 1;
                 op->opc = INDEX_op_setcond_i32;
                 op->args[1] = op->args[2];
                 op->args[2] = op->args[4];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 }
             do_setcond_low:
                 reset_temp(op->args[0]);
-                arg_info(op->args[0])->mask = 1;
+                arg_info(op->args[0])->z_mask = 1;
                 op->opc = INDEX_op_setcond_i32;
                 op->args[2] = op->args[3];
                 op->args[3] = op->args[5];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             /* Default case: we know nothing about operation (or were unable
                to compute the operation result) so no propagation is done.
                We trash everything if the operation is the end of a basic
-               block, otherwise we only trash the output args.  "mask" is
+               block, otherwise we only trash the output args.  "z_mask" is
                the non-zero bits mask for the first output arg.  */
             if (def->flags & TCG_OPF_BB_END) {
                 memset(&temps_used, 0, sizeof(temps_used));
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                     /* Save the corresponding known-zero bits mask for the
                        first output argument (only one supported so far). */
                     if (i == 0) {
-                        arg_info(op->args[i])->mask = mask;
+                        arg_info(op->args[i])->z_mask = z_mask;
                     }
                 }
             }
-- 
2.25.1

Provide what will become a larger context for splitting
the very large tcg_optimize function.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 37 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
 } TempOptInfo;
 
+typedef struct OptContext {
+    TCGTempSet temps_used;
+} OptContext;
+
 static inline TempOptInfo *ts_info(TCGTemp *ts)
 {
     return ts->state_ptr;
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
 }
 
 /* Initialize and activate a temporary.  */
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 {
     size_t idx = temp_idx(ts);
     TempOptInfo *ti;
 
-    if (test_bit(idx, temps_used->l)) {
+    if (test_bit(idx, ctx->temps_used.l)) {
         return;
     }
-    set_bit(idx, temps_used->l);
+    set_bit(idx, ctx->temps_used.l);
 
     ti = ts->state_ptr;
     if (ti == NULL) {
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
     }
 }
 
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
+static void init_arg_info(OptContext *ctx, TCGArg arg)
 {
-    init_ts_info(temps_used, arg_temp(arg));
+    init_ts_info(ctx, arg_temp(arg));
 }
 
 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
     }
 }
 
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
+static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
                              TCGOp *op, TCGArg dst, uint64_t val)
 {
     const TCGOpDef *def = &tcg_op_defs[op->opc];
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
 
     /* Convert movi to mov with constant temp. */
     tv = tcg_constant_internal(type, val);
-    init_ts_info(temps_used, tv);
+    init_ts_info(ctx, tv);
     tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
 }
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 {
     int nb_temps, nb_globals, i;
     TCGOp *op, *op_next, *prev_mb = NULL;
-    TCGTempSet temps_used;
+    OptContext ctx = {};
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
     nb_temps = s->nb_temps;
     nb_globals = s->nb_globals;
 
-    memset(&temps_used, 0, sizeof(temps_used));
     for (i = 0; i < nb_temps; ++i) {
         s->temps[i].state_ptr = NULL;
     }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             for (i = 0; i < nb_oargs + nb_iargs; i++) {
                 TCGTemp *ts = arg_temp(op->args[i]);
                 if (ts) {
-                    init_ts_info(&temps_used, ts);
+                    init_ts_info(&ctx, ts);
                 }
             }
         } else {
             nb_oargs = def->nb_oargs;
             nb_iargs = def->nb_iargs;
             for (i = 0; i < nb_oargs + nb_iargs; i++) {
-                init_arg_info(&temps_used, op->args[i]);
+                init_arg_info(&ctx, op->args[i]);
             }
         }
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(rotr):
             if (arg_is_const(op->args[1])
                 && arg_info(op->args[1])->val == 0) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
         if (partmask == 0) {
             tcg_debug_assert(nb_oargs == 1);
-            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
+            tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
             continue;
         }
         if (affected == 0) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(mulsh):
             if (arg_is_const(op->args[2])
                 && arg_info(op->args[2])->val == 0) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(sub):
         CASE_OP_32_64_VEC(xor):
             if (args_are_copies(op->args[1], op->args[2])) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = arg_info(op->args[1])->val;
                 tmp = dup_const(TCGOP_VECE(op), tmp);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_dup2_vec:
             assert(TCG_TARGET_REG_BITS == 32);
             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0],
                                  deposit64(arg_info(op->args[1])->val, 32, 32,
                                            arg_info(op->args[2])->val));
                 break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_extrh_i64_i32:
             if (arg_is_const(op->args[1])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                           op->args[2]);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                           arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 TCGArg v = arg_info(op->args[1])->val;
                 if (v != 0) {
                     tmp = do_constant_folding(opc, v, 0);
-                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                    tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 } else {
                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
                 }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 tmp = deposit64(arg_info(op->args[1])->val,
                                 op->args[3], op->args[4],
                                 arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = extract64(arg_info(op->args[1])->val,
                                 op->args[2], op->args[3]);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = sextract64(arg_info(op->args[1])->val,
                                  op->args[2], op->args[3]);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                     tmp = (int32_t)(((uint32_t)v1 >> shr) |
                                     ((uint32_t)v2 << (32 - shr)));
                 }
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             tmp = do_constant_folding_cond(opc, op->args[1],
                                            op->args[2], op->args[3]);
             if (tmp != 2) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                            op->args[1], op->args[2]);
             if (tmp != 2) {
                 if (tmp) {
-                    memset(&temps_used, 0, sizeof(temps_used));
+                    memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                     op->opc = INDEX_op_br;
                     op->args[0] = op->args[3];
                 } else {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
                 rl = op->args[0];
                 rh = op->args[1];
-                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
-                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
+                tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
+                tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
                 rl = op->args[0];
                 rh = op->args[1];
-                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
-                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
+                tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
+                tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
                 break;
             }
             goto do_default;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (tmp != 2) {
                 if (tmp) {
             do_brcond_true:
-                    memset(&temps_used, 0, sizeof(temps_used));
+                    memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                     op->opc = INDEX_op_br;
                     op->args[0] = op->args[5];
                 } else {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
             do_brcond_high:
-                memset(&temps_used, 0, sizeof(temps_used));
+                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                 op->opc = INDEX_op_brcond_i32;
                 op->args[0] = op->args[1];
                 op->args[1] = op->args[3];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                     goto do_default;
                 }
             do_brcond_low:
-                memset(&temps_used, 0, sizeof(temps_used));
+                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                 op->opc = INDEX_op_brcond_i32;
                 op->args[1] = op->args[2];
                 op->args[2] = op->args[4];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                             op->args[5]);
             if (tmp != 2) {
             do_setcond_const:
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
+                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
             } else if ((op->args[5] == TCG_COND_LT
                         || op->args[5] == TCG_COND_GE)
                        && arg_is_const(op->args[3])
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (!(tcg_call_flags(op)
                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
                 for (i = 0; i < nb_globals; i++) {
-                    if (test_bit(i, temps_used.l)) {
+                    if (test_bit(i, ctx.temps_used.l)) {
                         reset_ts(&s->temps[i]);
                     }
                 }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                block, otherwise we only trash the output args.  "z_mask" is
                the non-zero bits mask for the first output arg.  */
             if (def->flags & TCG_OPF_BB_END) {
-                memset(&temps_used, 0, sizeof(temps_used));
+                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
             } else {
         do_reset_output:
                 for (i = 0; i < nb_oargs; i++) {
-- 
2.25.1

Break the final cleanup clause out of the main switch
statement.  When fully folding an opcode to mov/movi,
use "continue" to process the next opcode, else break
to fall into the final cleanup.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
 1 file changed, 94 insertions(+), 96 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         switch (opc) {
         CASE_OP_32_64_VEC(mov):
             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
-            break;
+            continue;
 
         case INDEX_op_dup_vec:
             if (arg_is_const(op->args[1])) {
                 tmp = arg_info(op->args[1])->val;
                 tmp = dup_const(TCGOP_VECE(op), tmp);
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         case INDEX_op_dup2_vec:
             assert(TCG_TARGET_REG_BITS == 32);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0],
                                  deposit64(arg_info(op->args[1])->val, 32, 32,
                                            arg_info(op->args[2])->val));
-                break;
+                continue;
             } else if (args_are_copies(op->args[1], op->args[2])) {
                 op->opc = INDEX_op_dup_vec;
                 TCGOP_VECE(op) = MO_32;
                 nb_iargs = 1;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(not):
         CASE_OP_32_64(neg):
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(bswap16):
         CASE_OP_32_64(bswap32):
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                           op->args[2]);
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                           arg_info(op->args[2])->val);
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(clz):
         CASE_OP_32_64(ctz):
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 } else {
                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
                 }
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(deposit):
             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                 op->args[3], op->args[4],
                                 arg_info(op->args[2])->val);
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(extract):
             if (arg_is_const(op->args[1])) {
                 tmp = extract64(arg_info(op->args[1])->val,
                                 op->args[2], op->args[3]);
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(sextract):
             if (arg_is_const(op->args[1])) {
                 tmp = sextract64(arg_info(op->args[1])->val,
                                  op->args[2], op->args[3]);
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(extract2):
             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                     ((uint32_t)v2 << (32 - shr)));
                 }
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(opc, op->args[1],
                                            op->args[2], op->args[3]);
             if (tmp != 2) {
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(brcond):
             tmp = do_constant_folding_cond(opc, op->args[0],
                                            op->args[1], op->args[2]);
-            if (tmp != 2) {
-                if (tmp) {
-                    memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
-                    op->opc = INDEX_op_br;
-                    op->args[0] = op->args[3];
-                } else {
-                    tcg_op_remove(s, op);
-                }
+            switch (tmp) {
+            case 0:
+                tcg_op_remove(s, op);
+                continue;
+            case 1:
+                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
+                op->opc = opc = INDEX_op_br;
+                op->args[0] = op->args[3];
                 break;
             }
-            goto do_default;
+            break;
 
         CASE_OP_32_64(movcond):
             tmp = do_constant_folding_cond(opc, op->args[1],
                                            op->args[2], op->args[5]);
             if (tmp != 2) {
                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
-                break;
+                continue;
             }
             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
                 uint64_t tv = arg_info(op->args[3])->val;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 if (fv == 1 && tv == 0) {
                     cond = tcg_invert_cond(cond);
                 } else if (!(tv == 1 && fv == 0)) {
-                    goto do_default;
+                    break;
                 }
                 op->args[3] = cond;
                 op->opc = opc = (opc == INDEX_op_movcond_i32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                  : INDEX_op_setcond_i64);
                 nb_iargs = 2;
             }
-            goto do_default;
+            break;
 
         case INDEX_op_add2_i32:
         case INDEX_op_sub2_i32:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 rh = op->args[1];
                 tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
                 tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         case INDEX_op_mulu2_i32:
             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 rh = op->args[1];
                 tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
                 tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
-                break;
+                continue;
             }
-            goto do_default;
+            break;
 
         case INDEX_op_brcond2_i32:
             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
                                             op->args[4]);
-            if (tmp != 2) {
-                if (tmp) {
-            do_brcond_true:
-                    memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
-                    op->opc = INDEX_op_br;
-                    op->args[0] = op->args[5];
-                } else {
+            if (tmp == 0) {
             do_brcond_false:
-                    tcg_op_remove(s, op);
-                }
-            } else if ((op->args[4] == TCG_COND_LT
-                        || op->args[4] == TCG_COND_GE)
-                       && arg_is_const(op->args[2])
-                       && arg_info(op->args[2])->val == 0
-                       && arg_is_const(op->args[3])
-                       && arg_info(op->args[3])->val == 0) {
+                tcg_op_remove(s, op);
+                continue;
+            }
+            if (tmp == 1) {
+            do_brcond_true:
+                op->opc = opc = INDEX_op_br;
+                op->args[0] = op->args[5];
+                break;
+            }
+            if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
+                 && arg_is_const(op->args[2])
+                 && arg_info(op->args[2])->val == 0
+                 && arg_is_const(op->args[3])
+                 && arg_info(op->args[3])->val == 0) {
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
             do_brcond_high:
-                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
-                op->opc = INDEX_op_brcond_i32;
+                op->opc = opc = INDEX_op_brcond_i32;
                 op->args[0] = op->args[1];
                 op->args[1] = op->args[3];
                 op->args[2] = op->args[4];
                 op->args[3] = op->args[5];
-            } else if (op->args[4] == TCG_COND_EQ) {
+                break;
+            }
+            if (op->args[4] == TCG_COND_EQ) {
                 /* Simplify EQ comparisons where one of the pairs
                    can be simplified.  */
                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 if (tmp == 0) {
                     goto do_brcond_false;
                 } else if (tmp != 1) {
-                    goto do_default;
+                    break;
                 }
             do_brcond_low:
                 memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 op->args[1] = op->args[2];
                 op->args[2] = op->args[4];
                 op->args[3] = op->args[5];
-            } else if (op->args[4] == TCG_COND_NE) {
+                break;
+            }
+            if (op->args[4] == TCG_COND_NE) {
                 /* Simplify NE comparisons where one of the pairs
                    can be simplified.  */
                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 } else if (tmp == 1) {
                     goto do_brcond_true;
                 }
-                goto do_default;
-            } else {
-                goto do_default;
             }
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (tmp != 2) {
             do_setcond_const:
                 tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
-            } else if ((op->args[5] == TCG_COND_LT
-                        || op->args[5] == TCG_COND_GE)
-                       && arg_is_const(op->args[3])
-                       && arg_info(op->args[3])->val == 0
-                       && arg_is_const(op->args[4])
-                       && arg_info(op->args[4])->val == 0) {
+                continue;
+            }
+            if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
+                 && arg_is_const(op->args[3])
+                 && arg_info(op->args[3])->val == 0
+                 && arg_is_const(op->args[4])
+                 && arg_info(op->args[4])->val == 0) {
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
             do_setcond_high:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 op->args[1] = op->args[2];
                 op->args[2] = op->args[4];
                 op->args[3] = op->args[5];
-            } else if (op->args[5] == TCG_COND_EQ) {
+                break;
+            }
+            if (op->args[5] == TCG_COND_EQ) {
                 /* Simplify EQ comparisons where one of the pairs
                    can be simplified.  */
                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 if (tmp == 0) {
                     goto do_setcond_high;
                 } else if (tmp != 1) {
-                    goto do_default;
+                    break;
                 }
             do_setcond_low:
                 reset_temp(op->args[0]);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 op->opc = INDEX_op_setcond_i32;
                 op->args[2] = op->args[3];
                 op->args[3] = op->args[5];
-            } else if (op->args[5] == TCG_COND_NE) {
+                break;
+            }
+            if (op->args[5] == TCG_COND_NE) {
                 /* Simplify NE comparisons where one of the pairs
                    can be simplified.  */
                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 } else if (tmp == 1) {
                     goto do_setcond_const;
                 }
-                goto do_default;
-            } else {
-                goto do_default;
             }
             break;
 
-        case INDEX_op_call:
-            if (!(tcg_call_flags(op)
+        default:
+            break;
+        }
+
+        /* Some of the folding above can change opc. */
+        opc = op->opc;
+        def = &tcg_op_defs[opc];
+        if (def->flags & TCG_OPF_BB_END) {
+            memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
+        } else {
+            if (opc == INDEX_op_call &&
+                !(tcg_call_flags(op)
                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
                 for (i = 0; i < nb_globals; i++) {
                     if (test_bit(i, ctx.temps_used.l)) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                     }
                 }
             }
-            goto do_reset_output;
 
-        default:
-        do_default:
-            /* Default case: we know nothing about operation (or were unable
-               to compute the operation result) so no propagation is done.
-               We trash everything if the operation is the end of a basic
-               block, otherwise we only trash the output args.  "z_mask" is
-               the non-zero bits mask for the first output arg.  */
-            if (def->flags & TCG_OPF_BB_END) {
-                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
-            } else {
-        do_reset_output:
-                for (i = 0; i < nb_oargs; i++) {
-                    reset_temp(op->args[i]);
-                    /* Save the corresponding known-zero bits mask for the
-                       first output argument (only one supported so far). */
-                    if (i == 0) {
-                        arg_info(op->args[i])->z_mask = z_mask;
-                    }
+            for (i = 0; i < nb_oargs; i++) {
+                reset_temp(op->args[i]);
+                /* Save the corresponding known-zero bits mask for the
+                   first output argument (only one supported so far). */
+                if (i == 0) {
+                    arg_info(op->args[i])->z_mask = z_mask;
                 }
             }
-            break;
         }
 
         /* Eliminate duplicate and redundant fence instructions.  */
-- 
2.25.1

Adjust the interface to take the OptContext parameter instead
of TCGContext or both.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
 } TempOptInfo;
 
 typedef struct OptContext {
+    TCGContext *tcg;
     TCGTempSet temps_used;
 } OptContext;
 
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 }
 
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 {
     TCGTemp *dst_ts = arg_temp(dst);
     TCGTemp *src_ts = arg_temp(src);
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
     TCGOpcode new_op;
 
     if (ts_are_copies(dst_ts, src_ts)) {
-        tcg_op_remove(s, op);
+        tcg_op_remove(ctx->tcg, op);
         return;
     }
 
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
     }
 }
 
-static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
-                             TCGOp *op, TCGArg dst, uint64_t val)
+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
+                             TCGArg dst, uint64_t val)
 {
     const TCGOpDef *def = &tcg_op_defs[op->opc];
     TCGType type;
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
     /* Convert movi to mov with constant temp. */
     tv = tcg_constant_internal(type, val);
     init_ts_info(ctx, tv);
-    tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
+    tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 }
 
 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 {
     int nb_temps, nb_globals, i;
     TCGOp *op, *op_next, *prev_mb = NULL;
-    OptContext ctx = {};
+    OptContext ctx = { .tcg = s };
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(rotr):
             if (arg_is_const(op->args[1])
                 && arg_info(op->args[1])->val == 0) {
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (!arg_is_const(op->args[1])
                 && arg_is_const(op->args[2])
                 && arg_info(op->args[2])->val == 0) {
-                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (!arg_is_const(op->args[1])
                 && arg_is_const(op->args[2])
                 && arg_info(op->args[2])->val == -1) {
-                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
         if (partmask == 0) {
             tcg_debug_assert(nb_oargs == 1);
-            tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
+            tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
             continue;
         }
         if (affected == 0) {
             tcg_debug_assert(nb_oargs == 1);
-            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+            tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
             continue;
         }
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(mulsh):
             if (arg_is_const(op->args[2])
                 && arg_info(op->args[2])->val == 0) {
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(or):
         CASE_OP_32_64_VEC(and):
             if (args_are_copies(op->args[1], op->args[2])) {
-                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(sub):
         CASE_OP_32_64_VEC(xor):
             if (args_are_copies(op->args[1], op->args[2])) {
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
            allocator where needed and possible.  Also detect copies. */
         switch (opc) {
         CASE_OP_32_64_VEC(mov):
-            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+            tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
             continue;
 
         case INDEX_op_dup_vec:
             if (arg_is_const(op->args[1])) {
                 tmp = arg_info(op->args[1])->val;
                 tmp = dup_const(TCGOP_VECE(op), tmp);
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_dup2_vec:
             assert(TCG_TARGET_REG_BITS == 32);
             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0],
+                tcg_opt_gen_movi(&ctx, op, op->args[0],
                                  deposit64(arg_info(op->args[1])->val, 32, 32,
                                            arg_info(op->args[2])->val));
                 continue;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_extrh_i64_i32:
             if (arg_is_const(op->args[1])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                           op->args[2]);
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
                                           arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 TCGArg v = arg_info(op->args[1])->val;
                 if (v != 0) {
                     tmp = do_constant_folding(opc, v, 0);
-                    tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                    tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 } else {
-                    tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
+                    tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
                 }
                 continue;
             }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 tmp = deposit64(arg_info(op->args[1])->val,
                                 op->args[3], op->args[4],
                                 arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = extract64(arg_info(op->args[1])->val,
                                 op->args[2], op->args[3]);
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (arg_is_const(op->args[1])) {
                 tmp = sextract64(arg_info(op->args[1])->val,
                                  op->args[2], op->args[3]);
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                     tmp = (int32_t)(((uint32_t)v1 >> shr) |
                                     ((uint32_t)v2 << (32 - shr)));
                 }
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             tmp = do_constant_folding_cond(opc, op->args[1],
                                            op->args[2], op->args[3]);
             if (tmp != 2) {
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             tmp = do_constant_folding_cond(opc, op->args[1],
                                            op->args[2], op->args[5]);
             if (tmp != 2) {
-                tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
+                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
                 continue;
             }
             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
                 rl = op->args[0];
                 rh = op->args[1];
-                tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
-                tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
+                tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
+                tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
                 rl = op->args[0];
                 rh = op->args[1];
-                tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
-                tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
+                tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
+                tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
                 continue;
             }
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                                             op->args[5]);
             if (tmp != 2) {
             do_setcond_const:
-                tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
                 continue;
             }
             if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
-- 
2.25.1

This will expose the variable to subroutines that
will be broken out of tcg_optimize.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
 
 typedef struct OptContext {
     TCGContext *tcg;
+    TCGOp *prev_mb;
     TCGTempSet temps_used;
 } OptContext;
 
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 void tcg_optimize(TCGContext *s)
 {
     int nb_temps, nb_globals, i;
-    TCGOp *op, *op_next, *prev_mb = NULL;
+    TCGOp *op, *op_next;
     OptContext ctx = { .tcg = s };
 
     /* Array VALS has an element for each temp.
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         }
 
         /* Eliminate duplicate and redundant fence instructions.  */
-        if (prev_mb) {
+        if (ctx.prev_mb) {
             switch (opc) {
             case INDEX_op_mb:
                 /* Merge two barriers of the same type into one,
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                  * barrier.  This is stricter than specified but for
                  * the purposes of TCG is better than not optimizing.
                  */
-                prev_mb->args[0] |= op->args[0];
+                ctx.prev_mb->args[0] |= op->args[0];
                 tcg_op_remove(s, op);
                 break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             case INDEX_op_qemu_st_i64:
             case INDEX_op_call:
                 /* Opcodes that touch guest memory stop the optimization.  */
-                prev_mb = NULL;
+                ctx.prev_mb = NULL;
                 break;
             }
         } else if (opc == INDEX_op_mb) {
-            prev_mb = op;
+            ctx.prev_mb = op;
         }
     }
 }
-- 
2.25.1

There was no real reason for calls to have separate code here.
Unify init for calls vs non-calls using the call path, which
handles TCG_CALL_DUMMY_ARG.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
     }
 }
 
-static void init_arg_info(OptContext *ctx, TCGArg arg)
-{
-    init_ts_info(ctx, arg_temp(arg));
-}
-
 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 {
     TCGTemp *i, *g, *l;
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
     return false;
 }
 
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
+{
+    for (int i = 0; i < nb_args; i++) {
+        TCGTemp *ts = arg_temp(op->args[i]);
+        if (ts) {
+            init_ts_info(ctx, ts);
+        }
+    }
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 void tcg_optimize(TCGContext *s)
 {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         if (opc == INDEX_op_call) {
             nb_oargs = TCGOP_CALLO(op);
             nb_iargs = TCGOP_CALLI(op);
-            for (i = 0; i < nb_oargs + nb_iargs; i++) {
-                TCGTemp *ts = arg_temp(op->args[i]);
-                if (ts) {
-                    init_ts_info(&ctx, ts);
-                }
-            }
         } else {
             nb_oargs = def->nb_oargs;
             nb_iargs = def->nb_iargs;
-            for (i = 0; i < nb_oargs + nb_iargs; i++) {
-                init_arg_info(&ctx, op->args[i]);
-            }
         }
+        init_arguments(&ctx, op, nb_oargs + nb_iargs);
 
         /* Do copy propagation */
         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-- 
2.25.1

Continue splitting tcg_optimize.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
     }
 }
 
+static void copy_propagate(OptContext *ctx, TCGOp *op,
+                           int nb_oargs, int nb_iargs)
+{
+    TCGContext *s = ctx->tcg;
+
+    for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+        TCGTemp *ts = arg_temp(op->args[i]);
+        if (ts && ts_is_copy(ts)) {
+            op->args[i] = temp_arg(find_better_copy(s, ts));
+        }
+    }
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 void tcg_optimize(TCGContext *s)
 {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             nb_iargs = def->nb_iargs;
         }
         init_arguments(&ctx, op, nb_oargs + nb_iargs);
-
-        /* Do copy propagation */
-        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-            TCGTemp *ts = arg_temp(op->args[i]);
-            if (ts && ts_is_copy(ts)) {
-                op->args[i] = temp_arg(find_better_copy(s, ts));
-            }
-        }
+        copy_propagate(&ctx, op, nb_oargs, nb_iargs);
 
         /* For commutative operations make constant second argument */
         switch (opc) {
-- 
2.25.1

Calls are special in that they have a variable number
of arguments, and need to be able to clobber globals.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 22 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
     }
 }
 
+static bool fold_call(OptContext *ctx, TCGOp *op)
+{
+    TCGContext *s = ctx->tcg;
+    int nb_oargs = TCGOP_CALLO(op);
+    int nb_iargs = TCGOP_CALLI(op);
+    int flags, i;
+
+    init_arguments(ctx, op, nb_oargs + nb_iargs);
+    copy_propagate(ctx, op, nb_oargs, nb_iargs);
+
+    /* If the function reads or writes globals, reset temp data. */
+    flags = tcg_call_flags(op);
+    if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
+        int nb_globals = s->nb_globals;
+
+        for (i = 0; i < nb_globals; i++) {
+            if (test_bit(i, ctx->temps_used.l)) {
+                reset_ts(&ctx->tcg->temps[i]);
+            }
+        }
+    }
+
+    /* Reset temp data for outputs. */
+    for (i = 0; i < nb_oargs; i++) {
+        reset_temp(op->args[i]);
+    }
+
+    /* Stop optimizing MB across calls. */
+    ctx->prev_mb = NULL;
+    return true;
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 void tcg_optimize(TCGContext *s)
 {
-    int nb_temps, nb_globals, i;
+    int nb_temps, i;
     TCGOp *op, *op_next;
     OptContext ctx = { .tcg = s };
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
        available through the doubly linked circular list. */
 
     nb_temps = s->nb_temps;
-    nb_globals = s->nb_globals;
-
     for (i = 0; i < nb_temps; ++i) {
         s->temps[i].state_ptr = NULL;
     }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         uint64_t z_mask, partmask, affected, tmp;
         int nb_oargs, nb_iargs;
         TCGOpcode opc = op->opc;
-        const TCGOpDef *def = &tcg_op_defs[opc];
+        const TCGOpDef *def;
 
-        /* Count the arguments, and initialize the temps that are
-           going to be used */
+        /* Calls are special. */
         if (opc == INDEX_op_call) {
-            nb_oargs = TCGOP_CALLO(op);
-            nb_iargs = TCGOP_CALLI(op);
-        } else {
-            nb_oargs = def->nb_oargs;
-            nb_iargs = def->nb_iargs;
+            fold_call(&ctx, op);
+            continue;
         }
+
+        def = &tcg_op_defs[opc];
+        nb_oargs = def->nb_oargs;
+        nb_iargs = def->nb_iargs;
         init_arguments(&ctx, op, nb_oargs + nb_iargs);
         copy_propagate(&ctx, op, nb_oargs, nb_iargs);
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         if (def->flags & TCG_OPF_BB_END) {
             memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
         } else {
-            if (opc == INDEX_op_call &&
-                !(tcg_call_flags(op)
-                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
-                for (i = 0; i < nb_globals; i++) {
-                    if (test_bit(i, ctx.temps_used.l)) {
-                        reset_ts(&s->temps[i]);
-                    }
-                }
-            }
-
             for (i = 0; i < nb_oargs; i++) {
                 reset_temp(op->args[i]);
                 /* Save the corresponding known-zero bits mask for the
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             case INDEX_op_qemu_st_i32:
             case INDEX_op_qemu_st8_i32:
             case INDEX_op_qemu_st_i64:
-            case INDEX_op_call:
                 /* Opcodes that touch guest memory stop the optimization.  */
                 ctx.prev_mb = NULL;
                 break;
-- 
2.25.1

Rather than try to keep these up-to-date across folding,
re-read nb_oargs at the end, after re-reading the opcode.

A couple of asserts need dropping, but that will take care
of itself as we split the function further.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
         uint64_t z_mask, partmask, affected, tmp;
-        int nb_oargs, nb_iargs;
         TCGOpcode opc = op->opc;
         const TCGOpDef *def;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         }
 
         def = &tcg_op_defs[opc];
-        nb_oargs = def->nb_oargs;
-        nb_iargs = def->nb_iargs;
-        init_arguments(&ctx, op, nb_oargs + nb_iargs);
-        copy_propagate(&ctx, op, nb_oargs, nb_iargs);
+        init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
+        copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
 
         /* For commutative operations make constant second argument */
         switch (opc) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
 
         CASE_OP_32_64(qemu_ld):
             {
-                MemOpIdx oi = op->args[nb_oargs + nb_iargs];
+                MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
                 MemOp mop = get_memop(oi);
                 if (!(mop & MO_SIGN)) {
                     z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         }
 
         if (partmask == 0) {
-            tcg_debug_assert(nb_oargs == 1);
             tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
             continue;
         }
         if (affected == 0) {
-            tcg_debug_assert(nb_oargs == 1);
             tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
             continue;
         }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             } else if (args_are_copies(op->args[1], op->args[2])) {
                 op->opc = INDEX_op_dup_vec;
                 TCGOP_VECE(op) = MO_32;
-                nb_iargs = 1;
             }
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 op->opc = opc = (opc == INDEX_op_movcond_i32
                                  ? INDEX_op_setcond_i32
                                  : INDEX_op_setcond_i64);
-                nb_iargs = 2;
             }
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         if (def->flags & TCG_OPF_BB_END) {
             memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
         } else {
+            int nb_oargs = def->nb_oargs;
             for (i = 0; i < nb_oargs; i++) {
                 reset_temp(op->args[i]);
                 /* Save the corresponding known-zero bits mask for the
-- 
2.25.1

Return -1 instead of 2 for failure, so that we can
use comparisons against 0 for all cases.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
 1 file changed, 74 insertions(+), 71 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
     }
 }
 
-/* Return 2 if the condition can't be simplified, and the result
-   of the condition (0 or 1) if it can */
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
-                                       TCGArg y, TCGCond c)
+/*
+ * Return -1 if the condition can't be simplified,
+ * and the result of the condition (0 or 1) if it can.
+ */
+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
+                                    TCGArg y, TCGCond c)
 {
     uint64_t xv = arg_info(x)->val;
     uint64_t yv = arg_info(y)->val;
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
         case TCG_COND_GEU:
             return 1;
         default:
-            return 2;
+            return -1;
         }
     }
-    return 2;
+    return -1;
 }
 
-/* Return 2 if the condition can't be simplified, and the result
-   of the condition (0 or 1) if it can */
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+/*
+ * Return -1 if the condition can't be simplified,
+ * and the result of the condition (0 or 1) if it can.
+ */
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 {
     TCGArg al = p1[0], ah = p1[1];
     TCGArg bl = p2[0], bh = p2[1];
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
         return do_constant_folding_cond_eq(c);
     }
-    return 2;
+    return -1;
 }
 
 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
 
         CASE_OP_32_64(setcond):
-            tmp = do_constant_folding_cond(opc, op->args[1],
-                                           op->args[2], op->args[3]);
-            if (tmp != 2) {
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
+            i = do_constant_folding_cond(opc, op->args[1],
+                                         op->args[2], op->args[3]);
+            if (i >= 0) {
+                tcg_opt_gen_movi(&ctx, op, op->args[0], i);
                 continue;
             }
             break;
 
         CASE_OP_32_64(brcond):
-            tmp = do_constant_folding_cond(opc, op->args[0],
-                                           op->args[1], op->args[2]);
-            switch (tmp) {
-            case 0:
+            i = do_constant_folding_cond(opc, op->args[0],
+                                         op->args[1], op->args[2]);
+            if (i == 0) {
                 tcg_op_remove(s, op);
                 continue;
-            case 1:
+            } else if (i > 0) {
                 memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
                 op->opc = opc = INDEX_op_br;
                 op->args[0] = op->args[3];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
 
         CASE_OP_32_64(movcond):
-            tmp = do_constant_folding_cond(opc, op->args[1],
-                                           op->args[2], op->args[5]);
-            if (tmp != 2) {
-                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
+            i = do_constant_folding_cond(opc, op->args[1],
+                                         op->args[2], op->args[5]);
+            if (i >= 0) {
+                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
                 continue;
             }
             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
 
         case INDEX_op_brcond2_i32:
-            tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
-                                            op->args[4]);
-            if (tmp == 0) {
+            i = do_constant_folding_cond2(&op->args[0], &op->args[2],
+                                          op->args[4]);
+            if (i == 0) {
             do_brcond_false:
                 tcg_op_remove(s, op);
                 continue;
             }
-            if (tmp == 1) {
+            if (i > 0) {
             do_brcond_true:
                 op->opc = opc = INDEX_op_br;
                 op->args[0] = op->args[5];
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (op->args[4] == TCG_COND_EQ) {
                 /* Simplify EQ comparisons where one of the pairs
                    can be simplified.  */
-                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                               op->args[0], op->args[2],
-                                               TCG_COND_EQ);
-                if (tmp == 0) {
+                i = do_constant_folding_cond(INDEX_op_brcond_i32,
+                                             op->args[0], op->args[2],
+                                             TCG_COND_EQ);
+                if (i == 0) {
                     goto do_brcond_false;
-                } else if (tmp == 1) {
+                } else if (i > 0) {
                     goto do_brcond_high;
                 }
-                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                               op->args[1], op->args[3],
-                                               TCG_COND_EQ);
-                if (tmp == 0) {
+                i = do_constant_folding_cond(INDEX_op_brcond_i32,
+                                             op->args[1], op->args[3],
+                                             TCG_COND_EQ);
+                if (i == 0) {
                     goto do_brcond_false;
-                } else if (tmp != 1) {
+                } else if (i < 0) {
                     break;
                 }
             do_brcond_low:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (op->args[4] == TCG_COND_NE) {
                 /* Simplify NE comparisons where one of the pairs
                    can be simplified.  */
-                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                               op->args[0], op->args[2],
-                                               TCG_COND_NE);
-                if (tmp == 0) {
+                i = do_constant_folding_cond(INDEX_op_brcond_i32,
+                                             op->args[0], op->args[2],
+                                             TCG_COND_NE);
+                if (i == 0) {
                     goto do_brcond_high;
-                } else if (tmp == 1) {
+                } else if (i > 0) {
                     goto do_brcond_true;
                 }
-                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                               op->args[1], op->args[3],
-                                               TCG_COND_NE);
-                if (tmp == 0) {
+                i = do_constant_folding_cond(INDEX_op_brcond_i32,
+                                             op->args[1], op->args[3],
+                                             TCG_COND_NE);
+                if (i == 0) {
                     goto do_brcond_low;
-                } else if (tmp == 1) {
+                } else if (i > 0) {
                     goto do_brcond_true;
                 }
             }
             break;
 
         case INDEX_op_setcond2_i32:
-            tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
-                                            op->args[5]);
-            if (tmp != 2) {
+            i = do_constant_folding_cond2(&op->args[1], &op->args[3],
+                                          op->args[5]);
+            if (i >= 0) {
             do_setcond_const:
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
+                tcg_opt_gen_movi(&ctx, op, op->args[0], i);
                 continue;
             }
             if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (op->args[5] == TCG_COND_EQ) {
                 /* Simplify EQ comparisons where one of the pairs
                    can be simplified.  */
-                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                               op->args[1], op->args[3],
-                                               TCG_COND_EQ);
-                if (tmp == 0) {
+                i = do_constant_folding_cond(INDEX_op_setcond_i32,
+                                             op->args[1], op->args[3],
+                                             TCG_COND_EQ);
+                if (i == 0) {
                     goto do_setcond_const;
-                } else if (tmp == 1) {
+                } else if (i > 0) {
                     goto do_setcond_high;
                 }
-                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                               op->args[2], op->args[4],
-                                               TCG_COND_EQ);
-                if (tmp == 0) {
+                i = do_constant_folding_cond(INDEX_op_setcond_i32,
+                                             op->args[2], op->args[4],
+                                             TCG_COND_EQ);
+                if (i == 0) {
                     goto do_setcond_high;
-                } else if (tmp != 1) {
+                } else if (i < 0) {
                     break;
                 }
             do_setcond_low:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             if (op->args[5] == TCG_COND_NE) {
                 /* Simplify NE comparisons where one of the pairs
                    can be simplified.  */
-                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                               op->args[1], op->args[3],
-                                               TCG_COND_NE);
-                if (tmp == 0) {
+                i = do_constant_folding_cond(INDEX_op_setcond_i32,
+                                             op->args[1], op->args[3],
+                                             TCG_COND_NE);
+                if (i == 0) {
                     goto do_setcond_high;
-                } else if (tmp == 1) {
+                } else if (i > 0) {
                     goto do_setcond_const;
                 }
-                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                               op->args[2], op->args[4],
-                                               TCG_COND_NE);
-                if (tmp == 0) {
+                i = do_constant_folding_cond(INDEX_op_setcond_i32,
+                                             op->args[2], op->args[4],
+                                             TCG_COND_NE);
+                if (i == 0) {
                     goto do_setcond_low;
-                } else if (tmp == 1) {
+                } else if (i > 0) {
                     goto do_setcond_const;
                 }
             }
-- 
2.25.1

This will allow callers to tail call to these functions
and return true indicating processing complete.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 }
 
-static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 {
     TCGTemp *dst_ts = arg_temp(dst);
     TCGTemp *src_ts = arg_temp(src);
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 
     if (ts_are_copies(dst_ts, src_ts)) {
         tcg_op_remove(ctx->tcg, op);
-        return;
+        return true;
     }
 
     reset_ts(dst_ts);
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
         di->is_const = si->is_const;
         di->val = si->val;
     }
+    return true;
 }
 
-static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
                              TCGArg dst, uint64_t val)
 {
     const TCGOpDef *def = &tcg_op_defs[op->opc];
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
     /* Convert movi to mov with constant temp. */
     tv = tcg_constant_internal(type, val);
     init_ts_info(ctx, tv);
-    tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
+    return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 }
 
 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
-- 
2.25.1

Copy z_mask into OptContext, for writeback to the
first output within the new function.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
     TCGContext *tcg;
     TCGOp *prev_mb;
     TCGTempSet temps_used;
+
+    /* In flight values from optimization. */
+    uint64_t z_mask;
 } OptContext;
 
 static inline TempOptInfo *ts_info(TCGTemp *ts)
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
     }
 }
 
+static void finish_folding(OptContext *ctx, TCGOp *op)
+{
+    const TCGOpDef *def = &tcg_op_defs[op->opc];
+    int i, nb_oargs;
+
+    /*
+     * For an opcode that ends a BB, reset all temp data.
+     * We do no cross-BB optimization.
+     */
+    if (def->flags & TCG_OPF_BB_END) {
+        memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
+        ctx->prev_mb = NULL;
+        return;
+    }
+
+    nb_oargs = def->nb_oargs;
+    for (i = 0; i < nb_oargs; i++) {
+        reset_temp(op->args[i]);
+        /*
+         * Save the corresponding known-zero bits mask for the
+         * first output argument (only one supported so far).
+         */
+        if (i == 0) {
+            arg_info(op->args[i])->z_mask = ctx->z_mask;
+        }
+    }
+}
+
 static bool fold_call(OptContext *ctx, TCGOp *op)
 {
     TCGContext *s = ctx->tcg;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             partmask &= 0xffffffffu;
             affected &= 0xffffffffu;
         }
+        ctx.z_mask = z_mask;
 
         if (partmask == 0) {
             tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
         }
 
-        /* Some of the folding above can change opc. */
-        opc = op->opc;
-        def = &tcg_op_defs[opc];
-        if (def->flags & TCG_OPF_BB_END) {
-            memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
-        } else {
-            int nb_oargs = def->nb_oargs;
-            for (i = 0; i < nb_oargs; i++) {
-                reset_temp(op->args[i]);
-                /* Save the corresponding known-zero bits mask for the
-                   first output argument (only one supported so far). */
-                if (i == 0) {
-                    arg_info(op->args[i])->z_mask = z_mask;
-                }
-            }
-        }
+        finish_folding(&ctx, op);
 
         /* Eliminate duplicate and redundant fence instructions.  */
         if (ctx.prev_mb) {
-- 
2.25.1

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         uint64_t z_mask, partmask, affected, tmp;
         TCGOpcode opc = op->opc;
         const TCGOpDef *def;
+        bool done = false;
 
         /* Calls are special. */
         if (opc == INDEX_op_call) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
            allocator where needed and possible.  Also detect copies. */
         switch (opc) {
         CASE_OP_32_64_VEC(mov):
-            tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
-            continue;
+            done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
+            break;
 
         case INDEX_op_dup_vec:
             if (arg_is_const(op->args[1])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
         }
 
-        finish_folding(&ctx, op);
+        if (!done) {
+            finish_folding(&ctx, op);
+        }
 
         /* Eliminate duplicate and redundant fence instructions.  */
         if (ctx.prev_mb) {
-- 
2.25.1

This puts the separate mb optimization into the same framework
as the others.  While fold_qemu_{ld,st} are currently identical,
that won't last as more code gets moved.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 38 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
     return true;
 }
 
+static bool fold_mb(OptContext *ctx, TCGOp *op)
+{
+    /* Eliminate duplicate and redundant fence instructions.  */
+    if (ctx->prev_mb) {
+        /*
+         * Merge two barriers of the same type into one,
+         * or a weaker barrier into a stronger one,
+         * or two weaker barriers into a stronger one.
+         *   mb X; mb Y => mb X|Y
+         *   mb; strl => mb; st
+         *   ldaq; mb => ld; mb
+         *   ldaq; strl => ld; mb; st
+         * Other combinations are also merged into a strong
+         * barrier.  This is stricter than specified but for
+         * the purposes of TCG is better than not optimizing.
+         */
+        ctx->prev_mb->args[0] |= op->args[0];
+        tcg_op_remove(ctx->tcg, op);
+    } else {
+        ctx->prev_mb = op;
+    }
+    return true;
+}
+
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
+{
+    /* Opcodes that touch guest memory stop the mb optimization.  */
+    ctx->prev_mb = NULL;
+    return false;
+}
+
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
+{
+    /* Opcodes that touch guest memory stop the mb optimization.  */
+    ctx->prev_mb = NULL;
+    return false;
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 void tcg_optimize(TCGContext *s)
 {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
+        case INDEX_op_mb:
+            done = fold_mb(&ctx, op);
+            break;
+        case INDEX_op_qemu_ld_i32:
+        case INDEX_op_qemu_ld_i64:
+            done = fold_qemu_ld(&ctx, op);
+            break;
+        case INDEX_op_qemu_st_i32:
+        case INDEX_op_qemu_st8_i32:
+        case INDEX_op_qemu_st_i64:
+            done = fold_qemu_st(&ctx, op);
+            break;
+
         default:
             break;
         }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         if (!done) {
             finish_folding(&ctx, op);
         }
-
-        /* Eliminate duplicate and redundant fence instructions.  */
-        if (ctx.prev_mb) {
-            switch (opc) {
-            case INDEX_op_mb:
-                /* Merge two barriers of the same type into one,
-                 * or a weaker barrier into a stronger one,
-                 * or two weaker barriers into a stronger one.
-                 *   mb X; mb Y => mb X|Y
-                 *   mb; strl => mb; st
-                 *   ldaq; mb => ld; mb
-                 *   ldaq; strl => ld; mb; st
-                 * Other combinations are also merged into a strong
-                 * barrier.  This is stricter than specified but for
-                 * the purposes of TCG is better than not optimizing.
-                 */
-                ctx.prev_mb->args[0] |= op->args[0];
-                tcg_op_remove(s, op);
-                break;
-
-            default:
-                /* Opcodes that end the block stop the optimization.  */
-                if ((def->flags & TCG_OPF_BB_END) == 0) {
-                    break;
-                }
-                /* fallthru */
-            case INDEX_op_qemu_ld_i32:
-            case INDEX_op_qemu_ld_i64:
-            case INDEX_op_qemu_st_i32:
-            case INDEX_op_qemu_st8_i32:
-            case INDEX_op_qemu_st_i64:
-                /* Opcodes that touch guest memory stop the optimization.  */
-                ctx.prev_mb = NULL;
-                break;
-            }
-        } else if (opc == INDEX_op_mb) {
-            ctx.prev_mb = op;
-        }
     }
 }
-- 
2.25.1

Split out a whole bunch of placeholder functions, which are
currently identical.  That won't last as more code gets moved.

Use CASE_32_64_VEC for some logical operators that previously
missed the addition of vectors.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 219 insertions(+), 52 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
     }
 }
 
+/*
+ * The fold_* functions return true when processing is complete,
+ * usually by folding the operation to a constant or to a copy,
+ * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
+ * like collect information about the value produced, for use in
+ * optimizing a subsequent operation.
+ *
+ * These first fold_* functions are all helpers, used by other
+ * folders for more specific operations.
+ */
+
+static bool fold_const1(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1])) {
+        uint64_t t;
+
+        t = arg_info(op->args[1])->val;
+        t = do_constant_folding(op->opc, t, 0);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+    return false;
+}
+
+static bool fold_const2(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+        uint64_t t1 = arg_info(op->args[1])->val;
+        uint64_t t2 = arg_info(op->args[2])->val;
+
+        t1 = do_constant_folding(op->opc, t1, t2);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+    }
+    return false;
+}
+
+/*
+ * These outermost fold_<op> functions are sorted alphabetically.
+ */
+
+static bool fold_add(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_and(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_andc(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
 static bool fold_call(OptContext *ctx, TCGOp *op)
 {
     TCGContext *s = ctx->tcg;
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
     return true;
 }
 
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
+{
+    return fold_const1(ctx, op);
+}
+
+static bool fold_divide(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_exts(OptContext *ctx, TCGOp *op)
+{
+    return fold_const1(ctx, op);
+}
+
+static bool fold_extu(OptContext *ctx, TCGOp *op)
+{
+    return fold_const1(ctx, op);
+}
+
 static bool fold_mb(OptContext *ctx, TCGOp *op)
 {
     /* Eliminate duplicate and redundant fence instructions.  */
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
     return true;
 }
 
+static bool fold_mul(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_nand(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_neg(OptContext *ctx, TCGOp *op)
+{
+    return fold_const1(ctx, op);
+}
+
+static bool fold_nor(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_not(OptContext *ctx, TCGOp *op)
+{
+    return fold_const1(ctx, op);
+}
+
+static bool fold_or(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_orc(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
 {
     /* Opcodes that touch guest memory stop the mb optimization.  */
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_shift(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_sub(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
+static bool fold_xor(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op);
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 void tcg_optimize(TCGContext *s)
 {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        CASE_OP_32_64(not):
-        CASE_OP_32_64(neg):
-        CASE_OP_32_64(ext8s):
-        CASE_OP_32_64(ext8u):
-        CASE_OP_32_64(ext16s):
-        CASE_OP_32_64(ext16u):
-        CASE_OP_32_64(ctpop):
-        case INDEX_op_ext32s_i64:
-        case INDEX_op_ext32u_i64:
-        case INDEX_op_ext_i32_i64:
-        case INDEX_op_extu_i32_i64:
-        case INDEX_op_extrl_i64_i32:
-        case INDEX_op_extrh_i64_i32:
-            if (arg_is_const(op->args[1])) {
-                tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
-                continue;
-            }
-            break;
-
         CASE_OP_32_64(bswap16):
         CASE_OP_32_64(bswap32):
         case INDEX_op_bswap64_i64:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        CASE_OP_32_64(add):
-        CASE_OP_32_64(sub):
-        CASE_OP_32_64(mul):
-        CASE_OP_32_64(or):
-        CASE_OP_32_64(and):
-        CASE_OP_32_64(xor):
-        CASE_OP_32_64(shl):
-        CASE_OP_32_64(shr):
-        CASE_OP_32_64(sar):
-        CASE_OP_32_64(rotl):
-        CASE_OP_32_64(rotr):
-        CASE_OP_32_64(andc):
-        CASE_OP_32_64(orc):
-        CASE_OP_32_64(eqv):
-        CASE_OP_32_64(nand):
-        CASE_OP_32_64(nor):
-        CASE_OP_32_64(muluh):
-        CASE_OP_32_64(mulsh):
-        CASE_OP_32_64(div):
-        CASE_OP_32_64(divu):
-        CASE_OP_32_64(rem):
-        CASE_OP_32_64(remu):
-            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
-                                          arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
-                continue;
-            }
-            break;
-
         CASE_OP_32_64(clz):
         CASE_OP_32_64(ctz):
             if (arg_is_const(op->args[1])) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
+        default:
+            break;
+
+        /* ---------------------------------------------------------- */
+        /* Sorted alphabetically by opcode as much as possible. */
+
+        CASE_OP_32_64_VEC(add):
+            done = fold_add(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(and):
+            done = fold_and(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(andc):
+            done = fold_andc(&ctx, op);
+            break;
+        CASE_OP_32_64(ctpop):
+            done = fold_ctpop(&ctx, op);
+            break;
+        CASE_OP_32_64(div):
+        CASE_OP_32_64(divu):
+            done = fold_divide(&ctx, op);
+            break;
+        CASE_OP_32_64(eqv):
+            done = fold_eqv(&ctx, op);
+            break;
+        CASE_OP_32_64(ext8s):
+        CASE_OP_32_64(ext16s):
+        case INDEX_op_ext32s_i64:
+        case INDEX_op_ext_i32_i64:
+            done = fold_exts(&ctx, op);
+            break;
+        CASE_OP_32_64(ext8u):
+        CASE_OP_32_64(ext16u):
+        case INDEX_op_ext32u_i64:
+        case INDEX_op_extu_i32_i64:
+        case INDEX_op_extrl_i64_i32:
+        case INDEX_op_extrh_i64_i32:
+            done = fold_extu(&ctx, op);
+            break;
         case INDEX_op_mb:
             done = fold_mb(&ctx, op);
             break;
+        CASE_OP_32_64(mul):
+            done = fold_mul(&ctx, op);
+            break;
+        CASE_OP_32_64(mulsh):
+        CASE_OP_32_64(muluh):
+            done = fold_mul_highpart(&ctx, op);
+            break;
+        CASE_OP_32_64(nand):
+            done = fold_nand(&ctx, op);
+            break;
+        CASE_OP_32_64(neg):
+            done = fold_neg(&ctx, op);
+            break;
+        CASE_OP_32_64(nor):
+            done = fold_nor(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(not):
+            done = fold_not(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(or):
+            done = fold_or(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(orc):
+            done = fold_orc(&ctx, op);
+            break;
         case INDEX_op_qemu_ld_i32:
         case INDEX_op_qemu_ld_i64:
             done = fold_qemu_ld(&ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_qemu_st_i64:
             done = fold_qemu_st(&ctx, op);
             break;
-
-        default:
+        CASE_OP_32_64(rem):
+        CASE_OP_32_64(remu):
+            done = fold_remainder(&ctx, op);
+            break;
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+            done = fold_shift(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(sub):
+            done = fold_sub(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(xor):
+            done = fold_xor(&ctx, op);
             break;
         }
 
-- 
2.25.1

Reduce some code duplication by folding the NE and EQ cases.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
 1 file changed, 72 insertions(+), 73 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
+{
+    TCGCond cond = op->args[5];
+    int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
+    int inv = 0;
+
+    if (i >= 0) {
+        goto do_setcond_const;
+    }
+
+    switch (cond) {
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+        /*
+         * Simplify LT/GE comparisons vs zero to a single compare
+         * vs the high word of the input.
+         */
+        if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
+            arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
+            goto do_setcond_high;
+        }
+        break;
+
+    case TCG_COND_NE:
+        inv = 1;
+        QEMU_FALLTHROUGH;
+    case TCG_COND_EQ:
+        /*
+         * Simplify EQ/NE comparisons where one of the pairs
+         * can be simplified.
+         */
+        i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
+                                     op->args[3], cond);
+        switch (i ^ inv) {
+        case 0:
+            goto do_setcond_const;
+        case 1:
+            goto do_setcond_high;
+        }
+
+        i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
+                                     op->args[4], cond);
+        switch (i ^ inv) {
+        case 0:
+            goto do_setcond_const;
+        case 1:
+            op->args[2] = op->args[3];
+            op->args[3] = cond;
+            op->opc = INDEX_op_setcond_i32;
+            break;
+        }
+        break;
+
+    default:
+        break;
+
+    do_setcond_high:
+        op->args[1] = op->args[2];
+        op->args[2] = op->args[4];
+        op->args[3] = cond;
+        op->opc = INDEX_op_setcond_i32;
+        break;
+    }
+    return false;
+
+ do_setcond_const:
+    return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+}
+
 static bool fold_shift(OptContext *ctx, TCGOp *op)
 {
     return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        case INDEX_op_setcond2_i32:
-            i = do_constant_folding_cond2(&op->args[1], &op->args[3],
-                                          op->args[5]);
-            if (i >= 0) {
-            do_setcond_const:
-                tcg_opt_gen_movi(&ctx, op, op->args[0], i);
-                continue;
-            }
-            if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
-                 && arg_is_const(op->args[3])
-                 && arg_info(op->args[3])->val == 0
-                 && arg_is_const(op->args[4])
-                 && arg_info(op->args[4])->val == 0) {
-                /* Simplify LT/GE comparisons vs zero to a single compare
-                   vs the high word of the input.  */
-            do_setcond_high:
-                reset_temp(op->args[0]);
-                arg_info(op->args[0])->z_mask = 1;
-                op->opc = INDEX_op_setcond_i32;
-                op->args[1] = op->args[2];
-                op->args[2] = op->args[4];
-                op->args[3] = op->args[5];
-                break;
-            }
-            if (op->args[5] == TCG_COND_EQ) {
-                /* Simplify EQ comparisons where one of the pairs
-                   can be simplified.  */
-                i = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                             op->args[1], op->args[3],
-                                             TCG_COND_EQ);
-                if (i == 0) {
-                    goto do_setcond_const;
-                } else if (i > 0) {
-                    goto do_setcond_high;
-                }
-                i = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                             op->args[2], op->args[4],
-                                             TCG_COND_EQ);
-                if (i == 0) {
-                    goto do_setcond_high;
-                } else if (i < 0) {
-                    break;
-                }
-            do_setcond_low:
-                reset_temp(op->args[0]);
-                arg_info(op->args[0])->z_mask = 1;
-                op->opc = INDEX_op_setcond_i32;
-                op->args[2] = op->args[3];
-                op->args[3] = op->args[5];
-                break;
-            }
-            if (op->args[5] == TCG_COND_NE) {
-                /* Simplify NE comparisons where one of the pairs
-                   can be simplified.  */
-                i = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                             op->args[1], op->args[3],
-                                             TCG_COND_NE);
-                if (i == 0) {
-                    goto do_setcond_high;
-                } else if (i > 0) {
-                    goto do_setcond_const;
-                }
-                i = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                             op->args[2], op->args[4],
-                                             TCG_COND_NE);
-                if (i == 0) {
-                    goto do_setcond_low;
-                } else if (i > 0) {
-                    goto do_setcond_const;
-                }
-            }
-            break;
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(shr):
             done = fold_shift(&ctx, op);
             break;
+        case INDEX_op_setcond2_i32:
+            done = fold_setcond2(&ctx, op);
+            break;
         CASE_OP_32_64_VEC(sub):
             done = fold_sub(&ctx, op);
             break;
-- 
2.25.1

Reduce some code duplication by folding the NE and EQ cases.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
 1 file changed, 81 insertions(+), 78 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
+{
+    TCGCond cond = op->args[4];
+    int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
+    TCGArg label = op->args[5];
+    int inv = 0;
+
+    if (i >= 0) {
+        goto do_brcond_const;
+    }
+
+    switch (cond) {
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+        /*
+         * Simplify LT/GE comparisons vs zero to a single compare
+         * vs the high word of the input.
+         */
+        if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
+            arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
+            goto do_brcond_high;
+        }
+        break;
+
+    case TCG_COND_NE:
+        inv = 1;
+        QEMU_FALLTHROUGH;
+    case TCG_COND_EQ:
+        /*
+         * Simplify EQ/NE comparisons where one of the pairs
+         * can be simplified.
+         */
+        i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
+                                     op->args[2], cond);
+        switch (i ^ inv) {
+        case 0:
+            goto do_brcond_const;
+        case 1:
+            goto do_brcond_high;
+        }
+
+        i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
+                                     op->args[3], cond);
+        switch (i ^ inv) {
+        case 0:
+            goto do_brcond_const;
+        case 1:
+            op->opc = INDEX_op_brcond_i32;
+            op->args[1] = op->args[2];
+            op->args[2] = cond;
+            op->args[3] = label;
+            break;
+        }
+        break;
+
+    default:
+        break;
+
+    do_brcond_high:
+        op->opc = INDEX_op_brcond_i32;
+        op->args[0] = op->args[1];
+        op->args[1] = op->args[3];
+        op->args[2] = cond;
+        op->args[3] = label;
+        break;
+
+    do_brcond_const:
+        if (i == 0) {
+            tcg_op_remove(ctx->tcg, op);
+            return true;
+        }
+        op->opc = INDEX_op_br;
+        op->args[0] = label;
+        break;
+    }
+    return false;
+}
+
 static bool fold_call(OptContext *ctx, TCGOp *op)
 {
     TCGContext *s = ctx->tcg;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        case INDEX_op_brcond2_i32:
-            i = do_constant_folding_cond2(&op->args[0], &op->args[2],
-                                          op->args[4]);
-            if (i == 0) {
-            do_brcond_false:
-                tcg_op_remove(s, op);
-                continue;
-            }
-            if (i > 0) {
-            do_brcond_true:
-                op->opc = opc = INDEX_op_br;
-                op->args[0] = op->args[5];
-                break;
-            }
-            if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
-                 && arg_is_const(op->args[2])
-                 && arg_info(op->args[2])->val == 0
-                 && arg_is_const(op->args[3])
-                 && arg_info(op->args[3])->val == 0) {
-                /* Simplify LT/GE comparisons vs zero to a single compare
-                   vs the high word of the input.  */
-            do_brcond_high:
-                op->opc = opc = INDEX_op_brcond_i32;
-                op->args[0] = op->args[1];
-                op->args[1] = op->args[3];
-                op->args[2] = op->args[4];
-                op->args[3] = op->args[5];
-                break;
-            }
-            if (op->args[4] == TCG_COND_EQ) {
-                /* Simplify EQ comparisons where one of the pairs
-                   can be simplified.  */
-                i = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                             op->args[0], op->args[2],
-                                             TCG_COND_EQ);
-                if (i == 0) {
-                    goto do_brcond_false;
-                } else if (i > 0) {
-                    goto do_brcond_high;
-                }
-                i = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                             op->args[1], op->args[3],
-                                             TCG_COND_EQ);
-                if (i == 0) {
-                    goto do_brcond_false;
-                } else if (i < 0) {
-                    break;
-                }
-            do_brcond_low:
-                memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
-                op->opc = INDEX_op_brcond_i32;
-                op->args[1] = op->args[2];
-                op->args[2] = op->args[4];
-                op->args[3] = op->args[5];
-                break;
-            }
-            if (op->args[4] == TCG_COND_NE) {
-                /* Simplify NE comparisons where one of the pairs
-                   can be simplified.  */
-                i = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                             op->args[0], op->args[2],
-                                             TCG_COND_NE);
-                if (i == 0) {
-                    goto do_brcond_high;
-                } else if (i > 0) {
-                    goto do_brcond_true;
-                }
-                i = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                             op->args[1], op->args[3],
-                                             TCG_COND_NE);
-                if (i == 0) {
-                    goto do_brcond_low;
-                } else if (i > 0) {
-                    goto do_brcond_true;
-                }
-            }
-            break;
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(andc):
             done = fold_andc(&ctx, op);
             break;
+        case INDEX_op_brcond2_i32:
+            done = fold_brcond2(&ctx, op);
+            break;
         CASE_OP_32_64(ctpop):
             done = fold_ctpop(&ctx, op);
             break;
-- 
2.25.1

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
+        uint32_t a = arg_info(op->args[2])->val;
+        uint32_t b = arg_info(op->args[3])->val;
+        uint64_t r = (uint64_t)a * b;
+        TCGArg rl, rh;
+        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
+
+        rl = op->args[0];
+        rh = op->args[1];
+        tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
+        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
+        return true;
+    }
+    return false;
+}
+
 static bool fold_nand(OptContext *ctx, TCGOp *op)
 {
     return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        case INDEX_op_mulu2_i32:
-            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
-                uint32_t a = arg_info(op->args[2])->val;
-                uint32_t b = arg_info(op->args[3])->val;
-                uint64_t r = (uint64_t)a * b;
-                TCGArg rl, rh;
-                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
-
-                rl = op->args[0];
-                rh = op->args[1];
-                tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
-                tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
-                continue;
-            }
-            break;
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(muluh):
             done = fold_mul_highpart(&ctx, op);
             break;
+        case INDEX_op_mulu2_i32:
+            done = fold_mulu2_i32(&ctx, op);
+            break;
         CASE_OP_32_64(nand):
             done = fold_nand(&ctx, op);
             break;
-- 
2.25.1

Add two additional helpers, fold_add2_i32 and fold_sub2_i32
which will not be simple wrappers forever.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 26 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
+{
+    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
+        arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
+        uint32_t al = arg_info(op->args[2])->val;
+        uint32_t ah = arg_info(op->args[3])->val;
+        uint32_t bl = arg_info(op->args[4])->val;
+        uint32_t bh = arg_info(op->args[5])->val;
+        uint64_t a = ((uint64_t)ah << 32) | al;
+        uint64_t b = ((uint64_t)bh << 32) | bl;
+        TCGArg rl, rh;
+        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
+
+        if (add) {
+            a += b;
+        } else {
+            a -= b;
+        }
+
+        rl = op->args[0];
+        rh = op->args[1];
+        tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
+        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
+        return true;
+    }
+    return false;
+}
+
+static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
+{
+    return fold_addsub2_i32(ctx, op, true);
+}
+
 static bool fold_and(OptContext *ctx, TCGOp *op)
 {
     return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
+{
+    return fold_addsub2_i32(ctx, op, false);
+}
+
 static bool fold_xor(OptContext *ctx, TCGOp *op)
 {
     return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        case INDEX_op_add2_i32:
-        case INDEX_op_sub2_i32:
-            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
-                && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
-                uint32_t al = arg_info(op->args[2])->val;
-                uint32_t ah = arg_info(op->args[3])->val;
-                uint32_t bl = arg_info(op->args[4])->val;
-                uint32_t bh = arg_info(op->args[5])->val;
-                uint64_t a = ((uint64_t)ah << 32) | al;
-                uint64_t b = ((uint64_t)bh << 32) | bl;
-                TCGArg rl, rh;
-                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
-
-                if (opc == INDEX_op_add2_i32) {
-                    a += b;
-                } else {
-                    a -= b;
-                }
-
-                rl = op->args[0];
-                rh = op->args[1];
-                tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
-                tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
-                continue;
-            }
-            break;
 
         default:
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(add):
             done = fold_add(&ctx, op);
             break;
+        case INDEX_op_add2_i32:
+            done = fold_add2_i32(&ctx, op);
+            break;
         CASE_OP_32_64_VEC(and):
             done = fold_and(&ctx, op);
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(sub):
             done = fold_sub(&ctx, op);
             break;
+        case INDEX_op_sub2_i32:
+            done = fold_sub2_i32(&ctx, op);
+            break;
         CASE_OP_32_64_VEC(xor):
             done = fold_xor(&ctx, op);
             break;
-- 
2.25.1

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
     return true;
 }
 
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
+{
+    TCGOpcode opc = op->opc;
+    TCGCond cond = op->args[5];
+    int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
+
+    if (i >= 0) {
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
+    }
+
+    if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
+        uint64_t tv = arg_info(op->args[3])->val;
+        uint64_t fv = arg_info(op->args[4])->val;
+
+        opc = (opc == INDEX_op_movcond_i32
+               ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
+
+        if (tv == 1 && fv == 0) {
+            op->opc = opc;
+            op->args[3] = cond;
+        } else if (fv == 1 && tv == 0) {
+            op->opc = opc;
+            op->args[3] = tcg_invert_cond(cond);
+        }
+    }
+    return false;
+}
+
 static bool fold_mul(OptContext *ctx, TCGOp *op)
 {
     return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        CASE_OP_32_64(movcond):
-            i = do_constant_folding_cond(opc, op->args[1],
-                                         op->args[2], op->args[5]);
-            if (i >= 0) {
-                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
-                continue;
-            }
-            if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
-                uint64_t tv = arg_info(op->args[3])->val;
-                uint64_t fv = arg_info(op->args[4])->val;
-                TCGCond cond = op->args[5];
-
-                if (fv == 1 && tv == 0) {
-                    cond = tcg_invert_cond(cond);
-                } else if (!(tv == 1 && fv == 0)) {
-                    break;
-                }
-                op->args[3] = cond;
-                op->opc = opc = (opc == INDEX_op_movcond_i32
-                                 ? INDEX_op_setcond_i32
-                                 : INDEX_op_setcond_i64);
-            }
-            break;
-
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_mb:
             done = fold_mb(&ctx, op);
             break;
+        CASE_OP_32_64(movcond):
+            done = fold_movcond(&ctx, op);
+            break;
         CASE_OP_32_64(mul):
             done = fold_mul(&ctx, op);
             break;
-- 
2.25.1

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+        uint64_t v1 = arg_info(op->args[1])->val;
+        uint64_t v2 = arg_info(op->args[2])->val;
+        int shr = op->args[3];
+
+        if (op->opc == INDEX_op_extract2_i64) {
+            v1 >>= shr;
+            v2 <<= 64 - shr;
+        } else {
+            v1 = (uint32_t)v1 >> shr;
+            v2 = (int32_t)v2 << (32 - shr);
+        }
+        return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
+    }
+    return false;
+}
+
 static bool fold_exts(OptContext *ctx, TCGOp *op)
 {
     return fold_const1(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        CASE_OP_32_64(extract2):
-            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                uint64_t v1 = arg_info(op->args[1])->val;
-                uint64_t v2 = arg_info(op->args[2])->val;
-                int shr = op->args[3];
-
-                if (opc == INDEX_op_extract2_i64) {
-                    tmp = (v1 >> shr) | (v2 << (64 - shr));
-                } else {
-                    tmp = (int32_t)(((uint32_t)v1 >> shr) |
-                                    ((uint32_t)v2 << (32 - shr)));
-                }
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
-                continue;
-            }
-            break;
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(eqv):
             done = fold_eqv(&ctx, op);
             break;
+        CASE_OP_32_64(extract2):
+            done = fold_extract2(&ctx, op);
+            break;
         CASE_OP_32_64(ext8s):
         CASE_OP_32_64(ext16s):
         case INDEX_op_ext32s_i64:
-- 
2.25.1

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_extract(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1])) {
+        uint64_t t;
+
+        t = arg_info(op->args[1])->val;
+        t = extract64(t, op->args[2], op->args[3]);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+    return false;
+}
+
 static bool fold_extract2(OptContext *ctx, TCGOp *op)
 {
     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 }
 
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1])) {
+        uint64_t t;
+
+        t = arg_info(op->args[1])->val;
+        t = sextract64(t, op->args[2], op->args[3]);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+    return false;
+}
+
 static bool fold_shift(OptContext *ctx, TCGOp *op)
 {
     return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        CASE_OP_32_64(extract):
-            if (arg_is_const(op->args[1])) {
-                tmp = extract64(arg_info(op->args[1])->val,
-                                op->args[2], op->args[3]);
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
-                continue;
-            }
-            break;
-
-        CASE_OP_32_64(sextract):
-            if (arg_is_const(op->args[1])) {
-                tmp = sextract64(arg_info(op->args[1])->val,
-                                 op->args[2], op->args[3]);
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
-                continue;
-            }
-            break;
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(eqv):
             done = fold_eqv(&ctx, op);
             break;
+        CASE_OP_32_64(extract):
+            done = fold_extract(&ctx, op);
+            break;
         CASE_OP_32_64(extract2):
             done = fold_extract2(&ctx, op);
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_setcond2_i32:
             done = fold_setcond2(&ctx, op);
             break;
+        CASE_OP_32_64(sextract):
+            done = fold_sextract(&ctx, op);
+            break;
         CASE_OP_32_64_VEC(sub):
             done = fold_sub(&ctx, op);
             break;
-- 
2.25.1

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
     return fold_const1(ctx, op);
 }
 
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+        uint64_t t1 = arg_info(op->args[1])->val;
+        uint64_t t2 = arg_info(op->args[2])->val;
+
+        t1 = deposit64(t1, op->args[3], op->args[4], t2);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+    }
+    return false;
+}
+
 static bool fold_divide(OptContext *ctx, TCGOp *op)
 {
     return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        CASE_OP_32_64(deposit):
-            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                tmp = deposit64(arg_info(op->args[1])->val,
-                                op->args[3], op->args[4],
-                                arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
-                continue;
-            }
-            break;
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(ctpop):
             done = fold_ctpop(&ctx, op);
             break;
+        CASE_OP_32_64(deposit):
+            done = fold_deposit(&ctx, op);
+            break;
         CASE_OP_32_64(div):
         CASE_OP_32_64(divu):
             done = fold_divide(&ctx, op);
-- 
2.25.1

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1])) {
+        uint64_t t = arg_info(op->args[1])->val;
+
+        t = do_constant_folding(op->opc, t, op->args[2]);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+    return false;
+}
+
 static bool fold_call(OptContext *ctx, TCGOp *op)
 {
     TCGContext *s = ctx->tcg;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             break;
 
-        CASE_OP_32_64(bswap16):
-        CASE_OP_32_64(bswap32):
-        case INDEX_op_bswap64_i64:
-            if (arg_is_const(op->args[1])) {
-                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
-                                          op->args[2]);
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
-                continue;
-            }
-            break;
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_brcond2_i32:
             done = fold_brcond2(&ctx, op);
             break;
+        CASE_OP_32_64(bswap16):
+        CASE_OP_32_64(bswap32):
+        case INDEX_op_bswap64_i64:
+            done = fold_bswap(&ctx, op);
+            break;
         CASE_OP_32_64(clz):
         CASE_OP_32_64(ctz):
             done = fold_count_zeros(&ctx, op);
-- 
2.25.1

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_dup(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1])) {
+        uint64_t t = arg_info(op->args[1])->val;
+        t = dup_const(TCGOP_VECE(op), t);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+    return false;
+}
+
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+        uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
+                               arg_info(op->args[2])->val);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+
+    if (args_are_copies(op->args[1], op->args[2])) {
+        op->opc = INDEX_op_dup_vec;
+        TCGOP_VECE(op) = MO_32;
+    }
+    return false;
+}
+
 static bool fold_eqv(OptContext *ctx, TCGOp *op)
 {
     return fold_const2(ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
             break;
 
-        case INDEX_op_dup_vec:
-            if (arg_is_const(op->args[1])) {
-                tmp = arg_info(op->args[1])->val;
-                tmp = dup_const(TCGOP_VECE(op), tmp);
-                tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
-                continue;
-            }
-            break;
-
-        case INDEX_op_dup2_vec:
-            assert(TCG_TARGET_REG_BITS == 32);
-            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                tcg_opt_gen_movi(&ctx, op, op->args[0],
-                                 deposit64(arg_info(op->args[1])->val, 32, 32,
-                                           arg_info(op->args[2])->val));
-                continue;
-            } else if (args_are_copies(op->args[1], op->args[2])) {
-                op->opc = INDEX_op_dup_vec;
-                TCGOP_VECE(op) = MO_32;
-            }
-            break;
-
         default:
             break;
 
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(divu):
             done = fold_divide(&ctx, op);
             break;
+        case INDEX_op_dup_vec:
+            done = fold_dup(&ctx, op);
+            break;
+        case INDEX_op_dup2_vec:
+            done = fold_dup2(&ctx, op);
+            break;
         CASE_OP_32_64(eqv):
             done = fold_eqv(&ctx, op);
             break;
-- 
2.25.1

This is the final entry in the main switch that was in a
different form.  After this, we have the option to convert
the switch into a function dispatch table.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
     return true;
 }
 
+static bool fold_mov(OptContext *ctx, TCGOp *op)
+{
+    return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+}
+
 static bool fold_movcond(OptContext *ctx, TCGOp *op)
 {
     TCGOpcode opc = op->opc;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
         }
 
-        /* Propagate constants through copy operations and do constant
-           folding.  Constants will be substituted to arguments by register
-           allocator where needed and possible.  Also detect copies. */
+        /*
+         * Process each opcode.
+         * Sorted alphabetically by opcode as much as possible.
+         */
         switch (opc) {
-        CASE_OP_32_64_VEC(mov):
-            done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
-            break;
-
-        default:
-            break;
-
-        /* ---------------------------------------------------------- */
-        /* Sorted alphabetically by opcode as much as possible. */
-
         CASE_OP_32_64_VEC(add):
             done = fold_add(&ctx, op);
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_mb:
             done = fold_mb(&ctx, op);
             break;
+        CASE_OP_32_64_VEC(mov):
+            done = fold_mov(&ctx, op);
+            break;
         CASE_OP_32_64(movcond):
             done = fold_movcond(&ctx, op);
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(xor):
             done = fold_xor(&ctx, op);
             break;
+        default:
+            break;
         }
 
         if (!done) {
-- 
2.25.1

Pull the "op r, a, a => movi r, 0" optimization into a function,
and use it in the outer opcode fold functions.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+/* If the binary operation has both arguments equal, fold to @i. */
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (args_are_copies(op->args[1], op->args[2])) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+    }
+    return false;
+}
+
 /*
  * These outermost fold_<op> functions are sorted alphabetically.
  */
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
 
 static bool fold_andc(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_i(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_brcond(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
 
 static bool fold_sub(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_i(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
 
 static bool fold_xor(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_i(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 /* Propagate constants and copies, fold constant expressions. */
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
         }
 
-        /* Simplify expression for "op r, a, a => movi r, 0" cases */
-        switch (opc) {
-        CASE_OP_32_64_VEC(andc):
-        CASE_OP_32_64_VEC(sub):
-        CASE_OP_32_64_VEC(xor):
-            if (args_are_copies(op->args[1], op->args[2])) {
-                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
-                continue;
-            }
-            break;
-        default:
-            break;
-        }
-
         /*
          * Process each opcode.
          * Sorted alphabetically by opcode as much as possible.
-- 
2.25.1

Pull the "op r, a, a => mov r, a" optimization into a function,
and use it in the outer opcode fold functions.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
     return false;
 }
 
+/* If the binary operation has both arguments equal, fold to identity. */
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
+{
+    if (args_are_copies(op->args[1], op->args[2])) {
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+    }
+    return false;
+}
+
 /*
  * These outermost fold_<op> functions are sorted alphabetically.
+ *
+ * The ordering of the transformations should be:
+ *   1) those that produce a constant
+ *   2) those that produce a copy
+ *   3) those that produce information about the result value.
  */
 
 static bool fold_add(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
 
 static bool fold_and(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_x(ctx, op)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_andc(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
 
 static bool fold_or(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_x(ctx, op)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_orc(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
         }
 
-        /* Simplify expression for "op r, a, a => mov r, a" cases */
-        switch (opc) {
-        CASE_OP_32_64_VEC(or):
-        CASE_OP_32_64_VEC(and):
-            if (args_are_copies(op->args[1], op->args[2])) {
-                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
-                continue;
-            }
-            break;
-        default:
-            break;
-        }
-
         /*
          * Process each opcode.
          * Sorted alphabetically by opcode as much as possible.
-- 
2.25.1

Pull the "op r, a, 0 => movi r, 0" optimization into a function,
and use it in the outer opcode fold functions.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+/* If the binary operation has second argument @i, fold to @i. */
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+    }
+    return false;
+}
+
 /* If the binary operation has both arguments equal, fold to @i. */
 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 {
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
 static bool fold_and(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
+        fold_xi_to_i(ctx, op, 0) ||
         fold_xx_to_x(ctx, op)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
 
 static bool fold_mul(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_i(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_i(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             continue;
         }
 
-        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
-        switch (opc) {
-        CASE_OP_32_64_VEC(and):
-        CASE_OP_32_64_VEC(mul):
-        CASE_OP_32_64(muluh):
-        CASE_OP_32_64(mulsh):
-            if (arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == 0) {
-                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
-                continue;
-            }
-            break;
-        default:
-            break;
-        }
-
         /*
          * Process each opcode.
          * Sorted alphabetically by opcode as much as possible.
-- 
2.25.1

Compute the type of the operation early.

There are at least 4 places that used a def->flags ladder
to determine the type of the operation being optimized.

There were two places that assumed !TCG_OPF_64BIT means
TCG_TYPE_I32, and so could potentially compute incorrect
results for vector operations.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
 1 file changed, 89 insertions(+), 60 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
 
     /* In flight values from optimization. */
     uint64_t z_mask;
+    TCGType type;
 } OptContext;
 
 static inline TempOptInfo *ts_info(TCGTemp *ts)
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 {
     TCGTemp *dst_ts = arg_temp(dst);
     TCGTemp *src_ts = arg_temp(src);
-    const TCGOpDef *def;
     TempOptInfo *di;
     TempOptInfo *si;
     uint64_t z_mask;
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
     reset_ts(dst_ts);
     di = ts_info(dst_ts);
     si = ts_info(src_ts);
-    def = &tcg_op_defs[op->opc];
-    if (def->flags & TCG_OPF_VECTOR) {
-        new_op = INDEX_op_mov_vec;
-    } else if (def->flags & TCG_OPF_64BIT) {
-        new_op = INDEX_op_mov_i64;
-    } else {
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
         new_op = INDEX_op_mov_i32;
+        break;
+    case TCG_TYPE_I64:
+        new_op = INDEX_op_mov_i64;
+        break;
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+    case TCG_TYPE_V256:
+        /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
+        new_op = INDEX_op_mov_vec;
+        break;
+    default:
+        g_assert_not_reached();
     }
     op->opc = new_op;
-    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
     op->args[0] = dst;
     op->args[1] = src;
 
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
                              TCGArg dst, uint64_t val)
 {
-    const TCGOpDef *def = &tcg_op_defs[op->opc];
-    TCGType type;
-    TCGTemp *tv;
-
-    if (def->flags & TCG_OPF_VECTOR) {
-        type = TCGOP_VECL(op) + TCG_TYPE_V64;
-    } else if (def->flags & TCG_OPF_64BIT) {
-        type = TCG_TYPE_I64;
-    } else {
-        type = TCG_TYPE_I32;
-    }
-
     /* Convert movi to mov with constant temp. */
-    tv = tcg_constant_internal(type, val);
+    TCGTemp *tv = tcg_constant_internal(ctx->type, val);
+
     init_ts_info(ctx, tv);
     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 }
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
     }
 }
 
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
+                                    uint64_t x, uint64_t y)
 {
-    const TCGOpDef *def = &tcg_op_defs[op];
     uint64_t res = do_constant_folding_2(op, x, y);
-    if (!(def->flags & TCG_OPF_64BIT)) {
+    if (type == TCG_TYPE_I32) {
         res = (int32_t)res;
     }
     return res;
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
  * Return -1 if the condition can't be simplified,
  * and the result of the condition (0 or 1) if it can.
  */
-static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
+static int do_constant_folding_cond(TCGType type, TCGArg x,
                                     TCGArg y, TCGCond c)
 {
     uint64_t xv = arg_info(x)->val;
     uint64_t yv = arg_info(y)->val;
 
     if (arg_is_const(x) && arg_is_const(y)) {
-        const TCGOpDef *def = &tcg_op_defs[op];
-        tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
-        if (def->flags & TCG_OPF_64BIT) {
-            return do_constant_folding_cond_64(xv, yv, c);
-        } else {
+        switch (type) {
+        case TCG_TYPE_I32:
             return do_constant_folding_cond_32(xv, yv, c);
+        case TCG_TYPE_I64:
+            return do_constant_folding_cond_64(xv, yv, c);
+        default:
+            /* Only scalar comparisons are optimizable */
+            return -1;
         }
     } else if (args_are_copies(x, y)) {
         return do_constant_folding_cond_eq(c);
@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
         uint64_t t;
 
         t = arg_info(op->args[1])->val;
-        t = do_constant_folding(op->opc, t, 0);
+        t = do_constant_folding(op->opc, ctx->type, t, 0);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
     return false;
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
         uint64_t t1 = arg_info(op->args[1])->val;
         uint64_t t2 = arg_info(op->args[2])->val;
 
-        t1 = do_constant_folding(op->opc, t1, t2);
+        t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
     }
     return false;
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
 static bool fold_brcond(OptContext *ctx, TCGOp *op)
 {
     TCGCond cond = op->args[2];
-    int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
+    int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
 
     if (i == 0) {
         tcg_op_remove(ctx->tcg, op);
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
          * Simplify EQ/NE comparisons where one of the pairs
          * can be simplified.
          */
-        i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
+        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
                                      op->args[2], cond);
         switch (i ^ inv) {
         case 0:
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
             goto do_brcond_high;
         }
 
-        i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
+        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
                                      op->args[3], cond);
         switch (i ^ inv) {
         case 0:
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
     if (arg_is_const(op->args[1])) {
         uint64_t t = arg_info(op->args[1])->val;
 
-        t = do_constant_folding(op->opc, t, op->args[2]);
+        t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
     return false;
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
         uint64_t t = arg_info(op->args[1])->val;
 
         if (t != 0) {
-            t = do_constant_folding(op->opc, t, 0);
+            t = do_constant_folding(op->opc, ctx->type, t, 0);
             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
         }
         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
 
 static bool fold_movcond(OptContext *ctx, TCGOp *op)
 {
-    TCGOpcode opc = op->opc;
     TCGCond cond = op->args[5];
-    int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
+    int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
 
     if (i >= 0) {
         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
         uint64_t tv = arg_info(op->args[3])->val;
         uint64_t fv = arg_info(op->args[4])->val;
+        TCGOpcode opc;
 
-        opc = (opc == INDEX_op_movcond_i32
-               ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
+        switch (ctx->type) {
+        case TCG_TYPE_I32:
+            opc = INDEX_op_setcond_i32;
+            break;
+        case TCG_TYPE_I64:
+            opc = INDEX_op_setcond_i64;
+            break;
+        default:
+            g_assert_not_reached();
+        }
 
         if (tv == 1 && fv == 0) {
             op->opc = opc;
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
 static bool fold_setcond(OptContext *ctx, TCGOp *op)
 {
     TCGCond cond = op->args[3];
-    int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
+    int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
 
     if (i >= 0) {
         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
          * Simplify EQ/NE comparisons where one of the pairs
          * can be simplified.
          */
-        i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
+        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
                                      op->args[3], cond);
         switch (i ^ inv) {
         case 0:
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
             goto do_setcond_high;
         }
 
-        i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
+        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
                                      op->args[4], cond);
         switch (i ^ inv) {
         case 0:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
 
+        /* Pre-compute the type of the operation. */
+        if (def->flags & TCG_OPF_VECTOR) {
+            ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
+        } else if (def->flags & TCG_OPF_64BIT) {
+            ctx.type = TCG_TYPE_I64;
+        } else {
+            ctx.type = TCG_TYPE_I32;
+        }
+
         /* For commutative operations make constant second argument */
         switch (opc) {
         CASE_OP_32_64_VEC(add):
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                     /* Proceed with possible constant folding. */
                     break;
                 }
-                if (opc == INDEX_op_sub_i32) {
+                switch (ctx.type) {
+                case TCG_TYPE_I32:
                     neg_op = INDEX_op_neg_i32;
                     have_neg = TCG_TARGET_HAS_neg_i32;
-                } else if (opc == INDEX_op_sub_i64) {
+                    break;
+                case TCG_TYPE_I64:
                     neg_op = INDEX_op_neg_i64;
                     have_neg = TCG_TARGET_HAS_neg_i64;
-                } else if (TCG_TARGET_HAS_neg_vec) {
-                    TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
-                    unsigned vece = TCGOP_VECE(op);
-                    neg_op = INDEX_op_neg_vec;
-                    have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
-                } else {
                     break;
+                case TCG_TYPE_V64:
+                case TCG_TYPE_V128:
+                case TCG_TYPE_V256:
+                    neg_op = INDEX_op_neg_vec;
+                    have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
+                                                   TCGOP_VECE(op)) > 0;
+                    break;
+                default:
+                    g_assert_not_reached();
                 }
                 if (!have_neg) {
                     break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 TCGOpcode not_op;
                 bool have_not;
 
-                if (def->flags & TCG_OPF_VECTOR) {
-                    not_op = INDEX_op_not_vec;
-                    have_not = TCG_TARGET_HAS_not_vec;
-                } else if (def->flags & TCG_OPF_64BIT) {
-                    not_op = INDEX_op_not_i64;
-                    have_not = TCG_TARGET_HAS_not_i64;
-                } else {
+                switch (ctx.type) {
+                case TCG_TYPE_I32:
                     not_op = INDEX_op_not_i32;
                     have_not = TCG_TARGET_HAS_not_i32;
+                    break;
+                case TCG_TYPE_I64:
+                    not_op = INDEX_op_not_i64;
+                    have_not = TCG_TARGET_HAS_not_i64;
+                    break;
+                case TCG_TYPE_V64:
+                case TCG_TYPE_V128:
+                case TCG_TYPE_V256:
+                    not_op = INDEX_op_not_vec;
+                    have_not = TCG_TARGET_HAS_not_vec;
+                    break;
+                default:
+                    g_assert_not_reached();
                 }
                 if (!have_not) {
                     break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
            below, we can ignore high bits, but for further optimizations we
            need to record that the high bits contain garbage.  */
         partmask = z_mask;
-        if (!(def->flags & TCG_OPF_64BIT)) {
+        if (ctx.type == TCG_TYPE_I32) {
             z_mask |= ~(tcg_target_ulong)0xffffffffu;
             partmask &= 0xffffffffu;
             affected &= 0xffffffffu;
-- 
2.25.1

Split out the conditional conversion from a more complex logical
operation to a simple NOT.  Create a couple more helpers to make
this easy for the outer-most logical operations.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
 1 file changed, 86 insertions(+), 72 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+/*
+ * Convert @op to NOT, if NOT is supported by the host.
+ * Return true f the conversion is successful, which will still
+ * indicate that the processing is complete.
+ */
+static bool fold_not(OptContext *ctx, TCGOp *op);
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
+{
+    TCGOpcode not_op;
+    bool have_not;
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        not_op = INDEX_op_not_i32;
+        have_not = TCG_TARGET_HAS_not_i32;
+        break;
+    case TCG_TYPE_I64:
+        not_op = INDEX_op_not_i64;
+        have_not = TCG_TARGET_HAS_not_i64;
+        break;
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+    case TCG_TYPE_V256:
+        not_op = INDEX_op_not_vec;
+        have_not = TCG_TARGET_HAS_not_vec;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (have_not) {
+        op->opc = not_op;
+        op->args[1] = op->args[idx];
+        return fold_not(ctx, op);
+    }
+    return false;
+}
+
+/* If the binary operation has first argument @i, fold to NOT. */
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+        return fold_to_not(ctx, op, 2);
+    }
+    return false;
+}
+
 /* If the binary operation has second argument @i, fold to @i. */
 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 {
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
     return false;
 }
 
+/* If the binary operation has second argument @i, fold to NOT. */
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+        return fold_to_not(ctx, op, 1);
+    }
+    return false;
+}
+
 /* If the binary operation has both arguments equal, fold to @i. */
 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 {
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
 static bool fold_andc(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
-        fold_xx_to_i(ctx, op, 0)) {
+        fold_xx_to_i(ctx, op, 0) ||
+        fold_ix_to_not(ctx, op, -1)) {
         return true;
     }
     return false;
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
 
 static bool fold_eqv(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_not(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_extract(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
 
 static bool fold_nand(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_not(ctx, op, -1)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_neg(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
 
 static bool fold_nor(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_not(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_not(OptContext *ctx, TCGOp *op)
 {
-    return fold_const1(ctx, op);
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    /* Because of fold_to_not, we want to always return true, via finish. */
+    finish_folding(ctx, op);
+    return true;
 }
 
 static bool fold_or(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
 
 static bool fold_orc(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_ix_to_not(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
 static bool fold_xor(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
-        fold_xx_to_i(ctx, op, 0)) {
+        fold_xx_to_i(ctx, op, 0) ||
+        fold_xi_to_not(ctx, op, -1)) {
         return true;
     }
     return false;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 }
             }
             break;
-        CASE_OP_32_64_VEC(xor):
-        CASE_OP_32_64(nand):
-            if (!arg_is_const(op->args[1])
-                && arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == -1) {
-                i = 1;
-                goto try_not;
-            }
-            break;
-        CASE_OP_32_64(nor):
-            if (!arg_is_const(op->args[1])
-                && arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == 0) {
-                i = 1;
-                goto try_not;
-            }
-            break;
-        CASE_OP_32_64_VEC(andc):
-            if (!arg_is_const(op->args[2])
-                && arg_is_const(op->args[1])
-                && arg_info(op->args[1])->val == -1) {
-                i = 2;
-                goto try_not;
-            }
-            break;
-        CASE_OP_32_64_VEC(orc):
-        CASE_OP_32_64(eqv):
-            if (!arg_is_const(op->args[2])
-                && arg_is_const(op->args[1])
-                && arg_info(op->args[1])->val == 0) {
-                i = 2;
-                goto try_not;
-            }
-            break;
-        try_not:
-            {
-                TCGOpcode not_op;
-                bool have_not;
-
-                switch (ctx.type) {
-                case TCG_TYPE_I32:
-                    not_op = INDEX_op_not_i32;
-                    have_not = TCG_TARGET_HAS_not_i32;
-                    break;
-                case TCG_TYPE_I64:
-                    not_op = INDEX_op_not_i64;
-                    have_not = TCG_TARGET_HAS_not_i64;
-                    break;
-                case TCG_TYPE_V64:
-                case TCG_TYPE_V128:
-                case TCG_TYPE_V256:
-                    not_op = INDEX_op_not_vec;
-                    have_not = TCG_TARGET_HAS_not_vec;
-                    break;
-                default:
-                    g_assert_not_reached();
-                }
-                if (!have_not) {
-                    break;
-                }
-                op->opc = not_op;
-                reset_temp(op->args[0]);
-                op->args[1] = op->args[i];
-                continue;
-            }
         default:
             break;
         }
-- 
2.25.1

Even though there is only one user, place this more complex
conversion into its own helper.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
 1 file changed, 47 insertions(+), 42 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
 
 static bool fold_neg(OptContext *ctx, TCGOp *op)
 {
-    return fold_const1(ctx, op);
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+    /*
+     * Because of fold_sub_to_neg, we want to always return true,
+     * via finish_folding.
+     */
+    finish_folding(ctx, op);
+    return true;
 }
 
 static bool fold_nor(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
     return fold_const2(ctx, op);
 }
 
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
+{
+    TCGOpcode neg_op;
+    bool have_neg;
+
+    if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
+        return false;
+    }
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        neg_op = INDEX_op_neg_i32;
+        have_neg = TCG_TARGET_HAS_neg_i32;
+        break;
+    case TCG_TYPE_I64:
+        neg_op = INDEX_op_neg_i64;
+        have_neg = TCG_TARGET_HAS_neg_i64;
+        break;
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+    case TCG_TYPE_V256:
+        neg_op = INDEX_op_neg_vec;
+        have_neg = (TCG_TARGET_HAS_neg_vec &&
+                    tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (have_neg) {
+        op->opc = neg_op;
+        op->args[1] = op->args[2];
+        return fold_neg(ctx, op);
+    }
+    return false;
+}
+
 static bool fold_sub(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
-        fold_xx_to_i(ctx, op, 0)) {
+        fold_xx_to_i(ctx, op, 0) ||
+        fold_sub_to_neg(ctx, op)) {
         return true;
     }
     return false;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 continue;
             }
             break;
-        CASE_OP_32_64_VEC(sub):
-            {
-                TCGOpcode neg_op;
-                bool have_neg;
-
-                if (arg_is_const(op->args[2])) {
-                    /* Proceed with possible constant folding. */
-                    break;
-                }
-                switch (ctx.type) {
-                case TCG_TYPE_I32:
-                    neg_op = INDEX_op_neg_i32;
-                    have_neg = TCG_TARGET_HAS_neg_i32;
-                    break;
-                case TCG_TYPE_I64:
-                    neg_op = INDEX_op_neg_i64;
-                    have_neg = TCG_TARGET_HAS_neg_i64;
-                    break;
-                case TCG_TYPE_V64:
-                case TCG_TYPE_V128:
-                case TCG_TYPE_V256:
-                    neg_op = INDEX_op_neg_vec;
-                    have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
-                                                   TCGOP_VECE(op)) > 0;
-                    break;
-                default:
-                    g_assert_not_reached();
-                }
-                if (!have_neg) {
-                    break;
-                }
-                if (arg_is_const(op->args[1])
-                    && arg_info(op->args[1])->val == 0) {
-                    op->opc = neg_op;
-                    reset_temp(op->args[0]);
-                    op->args[1] = op->args[2];
-                    continue;
-                }
-            }
-            break;
         default:
             break;
         }
-- 
2.25.1

Pull the "op r, a, i => mov r, a" optimization into a function,
and use them in the outer-most logical operations.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
 1 file changed, 26 insertions(+), 35 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
     return false;
 }
 
+/* If the binary operation has second argument @i, fold to identity. */
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+    }
+    return false;
+}
+
 /* If the binary operation has second argument @i, fold to NOT. */
 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 {
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
 
 static bool fold_add(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
         fold_xi_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, -1) ||
         fold_xx_to_x(ctx, op)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
         fold_xx_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, 0) ||
         fold_ix_to_not(ctx, op, -1)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
 static bool fold_eqv(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
+        fold_xi_to_x(ctx, op, -1) ||
         fold_xi_to_not(ctx, op, 0)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
 static bool fold_or(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0) ||
         fold_xx_to_x(ctx, op)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
 static bool fold_orc(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
+        fold_xi_to_x(ctx, op, -1) ||
         fold_ix_to_not(ctx, op, 0)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
 
 static bool fold_shift(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op);
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0)) {
+        return true;
+    }
+    return false;
 }
 
 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
         fold_xx_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, 0) ||
         fold_sub_to_neg(ctx, op)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
         fold_xx_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, 0) ||
         fold_xi_to_not(ctx, op, -1)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
         }
 
-        /* Simplify expression for "op r, a, const => mov r, a" cases */
-        switch (opc) {
-        CASE_OP_32_64_VEC(add):
-        CASE_OP_32_64_VEC(sub):
-        CASE_OP_32_64_VEC(or):
-        CASE_OP_32_64_VEC(xor):
-        CASE_OP_32_64_VEC(andc):
-        CASE_OP_32_64(shl):
-        CASE_OP_32_64(shr):
-        CASE_OP_32_64(sar):
-        CASE_OP_32_64(rotl):
-        CASE_OP_32_64(rotr):
-            if (!arg_is_const(op->args[1])
-                && arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == 0) {
-                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
-                continue;
-            }
-            break;
-        CASE_OP_32_64_VEC(and):
-        CASE_OP_32_64_VEC(orc):
-        CASE_OP_32_64(eqv):
-            if (!arg_is_const(op->args[1])
-                && arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == -1) {
-                tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
-                continue;
-            }
-            break;
-        default:
-            break;
-        }
-
         /* Simplify using known-zero bits. Currently only ops with a single
            output argument is supported. */
         z_mask = -1;
-- 
2.25.1

Pull the "op r, 0, b => movi r, 0" optimization into a function,
and use it in fold_shift.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
     return false;
 }
 
+/* If the binary operation has first argument @i, fold to @i. */
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+    }
+    return false;
+}
+
 /* If the binary operation has first argument @i, fold to NOT. */
 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 {
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
 static bool fold_shift(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
+        fold_ix_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, 0)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
         }
 
-        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
-           and "sub r, 0, a => neg r, a" case.  */
-        switch (opc) {
-        CASE_OP_32_64(shl):
-        CASE_OP_32_64(shr):
-        CASE_OP_32_64(sar):
-        CASE_OP_32_64(rotl):
-        CASE_OP_32_64(rotr):
-            if (arg_is_const(op->args[1])
-                && arg_info(op->args[1])->val == 0) {
-                tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
-                continue;
-            }
-            break;
-        default:
-            break;
-        }
-
         /* Simplify using known-zero bits. Currently only ops with a single
            output argument is supported. */
         z_mask = -1;
-- 
2.25.1

Move all of the known-zero optimizations into the per-opcode
functions.  Use fold_masks when there is a possibility of the
result being determined, and simply set ctx->z_mask otherwise.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
 1 file changed, 294 insertions(+), 251 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
     TCGTempSet temps_used;
 
     /* In flight values from optimization. */
-    uint64_t z_mask;
+    uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
+    uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
     TCGType type;
 } OptContext;
 
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_masks(OptContext *ctx, TCGOp *op)
+{
+    uint64_t a_mask = ctx->a_mask;
+    uint64_t z_mask = ctx->z_mask;
+
+    /*
+     * 32-bit ops generate 32-bit results.  For the result is zero test
+     * below, we can ignore high bits, but for further optimizations we
+     * need to record that the high bits contain garbage.
+     */
+    if (ctx->type == TCG_TYPE_I32) {
+        ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
+        a_mask &= MAKE_64BIT_MASK(0, 32);
+        z_mask &= MAKE_64BIT_MASK(0, 32);
+    }
+
+    if (z_mask == 0) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
+    }
+    if (a_mask == 0) {
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+    }
+    return false;
+}
+
 /*
  * Convert @op to NOT, if NOT is supported by the host.
  * Return true f the conversion is successful, which will still
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
 
 static bool fold_and(OptContext *ctx, TCGOp *op)
 {
+    uint64_t z1, z2;
+
     if (fold_const2(ctx, op) ||
         fold_xi_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, -1) ||
         fold_xx_to_x(ctx, op)) {
         return true;
     }
-    return false;
+
+    z1 = arg_info(op->args[1])->z_mask;
+    z2 = arg_info(op->args[2])->z_mask;
+    ctx->z_mask = z1 & z2;
+
+    /*
+     * Known-zeros does not imply known-ones.  Therefore unless
+     * arg2 is constant, we can't infer affected bits from it.
+     */
+    if (arg_is_const(op->args[2])) {
+        ctx->a_mask = z1 & ~z2;
+    }
+
+    return fold_masks(ctx, op);
 }
 
 static bool fold_andc(OptContext *ctx, TCGOp *op)
 {
+    uint64_t z1;
+
     if (fold_const2(ctx, op) ||
         fold_xx_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, 0) ||
         fold_ix_to_not(ctx, op, -1)) {
         return true;
     }
-    return false;
+
+    z1 = arg_info(op->args[1])->z_mask;
+
+    /*
+     * Known-zeros does not imply known-ones.  Therefore unless
+     * arg2 is constant, we can't infer anything from it.
+     */
+    if (arg_is_const(op->args[2])) {
+        uint64_t z2 = ~arg_info(op->args[2])->z_mask;
+        ctx->a_mask = z1 & ~z2;
+        z1 &= z2;
+    }
+    ctx->z_mask = z1;
+
+    return fold_masks(ctx, op);
 }
 
 static bool fold_brcond(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
 
 static bool fold_bswap(OptContext *ctx, TCGOp *op)
 {
+    uint64_t z_mask, sign;
+
     if (arg_is_const(op->args[1])) {
         uint64_t t = arg_info(op->args[1])->val;
 
         t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
-    return false;
+
+    z_mask = arg_info(op->args[1])->z_mask;
+    switch (op->opc) {
+    case INDEX_op_bswap16_i32:
+    case INDEX_op_bswap16_i64:
+        z_mask = bswap16(z_mask);
+        sign = INT16_MIN;
+        break;
+    case INDEX_op_bswap32_i32:
+    case INDEX_op_bswap32_i64:
+        z_mask = bswap32(z_mask);
+        sign = INT32_MIN;
+        break;
+    case INDEX_op_bswap64_i64:
+        z_mask = bswap64(z_mask);
+        sign = INT64_MIN;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
+    case TCG_BSWAP_OZ:
+        break;
+    case TCG_BSWAP_OS:
+        /* If the sign bit may be 1, force all the bits above to 1. */
+        if (z_mask & sign) {
+            z_mask |= sign;
+        }
+        break;
+    default:
+        /* The high bits are undefined: force all bits above the sign to 1. */
+        z_mask |= sign << 1;
+        break;
+    }
+    ctx->z_mask = z_mask;
+
+    return fold_masks(ctx, op);
 }
 
 static bool fold_call(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
 
 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
 {
+    uint64_t z_mask;
+
     if (arg_is_const(op->args[1])) {
         uint64_t t = arg_info(op->args[1])->val;
 
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
         }
         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
     }
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        z_mask = 31;
+        break;
+    case TCG_TYPE_I64:
+        z_mask = 63;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
+
     return false;
 }
 
 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
 {
-    return fold_const1(ctx, op);
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        ctx->z_mask = 32 | 31;
+        break;
+    case TCG_TYPE_I64:
+        ctx->z_mask = 64 | 63;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return false;
 }
 
 static bool fold_deposit(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
         t1 = deposit64(t1, op->args[3], op->args[4], t2);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
     }
+
+    ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
+                            op->args[3], op->args[4],
+                            arg_info(op->args[2])->z_mask);
     return false;
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
 
 static bool fold_extract(OptContext *ctx, TCGOp *op)
 {
+    uint64_t z_mask_old, z_mask;
+
     if (arg_is_const(op->args[1])) {
         uint64_t t;
 
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
         t = extract64(t, op->args[2], op->args[3]);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
-    return false;
+
+    z_mask_old = arg_info(op->args[1])->z_mask;
+    z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
+    if (op->args[2] == 0) {
+        ctx->a_mask = z_mask_old ^ z_mask;
+    }
+    ctx->z_mask = z_mask;
+
+    return fold_masks(ctx, op);
 }
 
 static bool fold_extract2(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
 
 static bool fold_exts(OptContext *ctx, TCGOp *op)
 {
-    return fold_const1(ctx, op);
+    uint64_t z_mask_old, z_mask, sign;
+    bool type_change = false;
+
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
+
+    switch (op->opc) {
+    CASE_OP_32_64(ext8s):
+        sign = INT8_MIN;
+        z_mask = (uint8_t)z_mask;
+        break;
+    CASE_OP_32_64(ext16s):
+        sign = INT16_MIN;
+        z_mask = (uint16_t)z_mask;
+        break;
+    case INDEX_op_ext_i32_i64:
+        type_change = true;
+        QEMU_FALLTHROUGH;
+    case INDEX_op_ext32s_i64:
+        sign = INT32_MIN;
+        z_mask = (uint32_t)z_mask;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (z_mask & sign) {
+        z_mask |= sign;
+    } else if (!type_change) {
+        ctx->a_mask = z_mask_old ^ z_mask;
+    }
+    ctx->z_mask = z_mask;
+
+    return fold_masks(ctx, op);
 }
 
 static bool fold_extu(OptContext *ctx, TCGOp *op)
 {
-    return fold_const1(ctx, op);
+    uint64_t z_mask_old, z_mask;
+    bool type_change = false;
+
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
+
+    switch (op->opc) {
+    CASE_OP_32_64(ext8u):
+        z_mask = (uint8_t)z_mask;
+        break;
+    CASE_OP_32_64(ext16u):
+        z_mask = (uint16_t)z_mask;
+        break;
+    case INDEX_op_extrl_i64_i32:
+    case INDEX_op_extu_i32_i64:
+        type_change = true;
+        QEMU_FALLTHROUGH;
+    case INDEX_op_ext32u_i64:
+        z_mask = (uint32_t)z_mask;
+        break;
+    case INDEX_op_extrh_i64_i32:
+        type_change = true;
+        z_mask >>= 32;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    ctx->z_mask = z_mask;
+    if (!type_change) {
+        ctx->a_mask = z_mask_old ^ z_mask;
+    }
+    return fold_masks(ctx, op);
 }
 
 static bool fold_mb(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
     }
 
+    ctx->z_mask = arg_info(op->args[3])->z_mask
+                | arg_info(op->args[4])->z_mask;
+
     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
         uint64_t tv = arg_info(op->args[3])->val;
         uint64_t fv = arg_info(op->args[4])->val;
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
 
 static bool fold_neg(OptContext *ctx, TCGOp *op)
 {
+    uint64_t z_mask;
+
     if (fold_const1(ctx, op)) {
         return true;
     }
+
+    /* Set to 1 all bits to the left of the rightmost.  */
+    z_mask = arg_info(op->args[1])->z_mask;
+    ctx->z_mask = -(z_mask & -z_mask);
+
     /*
      * Because of fold_sub_to_neg, we want to always return true,
      * via finish_folding.
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
         fold_xx_to_x(ctx, op)) {
         return true;
     }
-    return false;
+
+    ctx->z_mask = arg_info(op->args[1])->z_mask
+                | arg_info(op->args[2])->z_mask;
+    return fold_masks(ctx, op);
 }
 
 static bool fold_orc(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
 
 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
 {
+    const TCGOpDef *def = &tcg_op_defs[op->opc];
+    MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
+    MemOp mop = get_memop(oi);
+    int width = 8 * memop_size(mop);
+
+    if (!(mop & MO_SIGN) && width < 64) {
+        ctx->z_mask = MAKE_64BIT_MASK(0, width);
+    }
+
     /* Opcodes that touch guest memory stop the mb optimization.  */
     ctx->prev_mb = NULL;
     return false;
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
     if (i >= 0) {
         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
     }
+
+    ctx->z_mask = 1;
     return false;
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
         op->opc = INDEX_op_setcond_i32;
         break;
     }
+
+    ctx->z_mask = 1;
     return false;
 
  do_setcond_const:
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
 
 static bool fold_sextract(OptContext *ctx, TCGOp *op)
 {
+    int64_t z_mask_old, z_mask;
+
     if (arg_is_const(op->args[1])) {
         uint64_t t;
 
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
         t = sextract64(t, op->args[2], op->args[3]);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
-    return false;
+
+    z_mask_old = arg_info(op->args[1])->z_mask;
+    z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
+    if (op->args[2] == 0 && z_mask >= 0) {
+        ctx->a_mask = z_mask_old ^ z_mask;
+    }
+    ctx->z_mask = z_mask;
+
+    return fold_masks(ctx, op);
 }
 
 static bool fold_shift(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
         fold_xi_to_x(ctx, op, 0)) {
         return true;
     }
+
+    if (arg_is_const(op->args[2])) {
+        ctx->z_mask = do_constant_folding(op->opc, ctx->type,
+                                          arg_info(op->args[1])->z_mask,
+                                          arg_info(op->args[2])->val);
+        return fold_masks(ctx, op);
+    }
     return false;
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
     return fold_addsub2_i32(ctx, op, false);
 }
 
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
+{
+    /* We can't do any folding with a load, but we can record bits. */
+    switch (op->opc) {
+    CASE_OP_32_64(ld8u):
+        ctx->z_mask = MAKE_64BIT_MASK(0, 8);
+        break;
+    CASE_OP_32_64(ld16u):
+        ctx->z_mask = MAKE_64BIT_MASK(0, 16);
+        break;
+    case INDEX_op_ld32u_i64:
+        ctx->z_mask = MAKE_64BIT_MASK(0, 32);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return false;
+}
+
 static bool fold_xor(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2(ctx, op) ||
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
         fold_xi_to_not(ctx, op, -1)) {
         return true;
     }
-    return false;
+
+    ctx->z_mask = arg_info(op->args[1])->z_mask
+                | arg_info(op->args[2])->z_mask;
+    return fold_masks(ctx, op);
 }
 
 /* Propagate constants and copies, fold constant expressions. */
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
     }
 
     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
-        uint64_t z_mask, partmask, affected, tmp;
         TCGOpcode opc = op->opc;
         const TCGOpDef *def;
         bool done = false;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             break;
         }
 
-        /* Simplify using known-zero bits. Currently only ops with a single
-           output argument is supported. */
-        z_mask = -1;
-        affected = -1;
-        switch (opc) {
-        CASE_OP_32_64(ext8s):
-            if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
-                break;
-            }
-            QEMU_FALLTHROUGH;
-        CASE_OP_32_64(ext8u):
-            z_mask = 0xff;
-            goto and_const;
-        CASE_OP_32_64(ext16s):
-            if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
-                break;
-            }
-            QEMU_FALLTHROUGH;
-        CASE_OP_32_64(ext16u):
-            z_mask = 0xffff;
-            goto and_const;
-        case INDEX_op_ext32s_i64:
-            if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
-                break;
-            }
-            QEMU_FALLTHROUGH;
-        case INDEX_op_ext32u_i64:
-            z_mask = 0xffffffffU;
-            goto and_const;
-
-        CASE_OP_32_64(and):
-            z_mask = arg_info(op->args[2])->z_mask;
-            if (arg_is_const(op->args[2])) {
-        and_const:
-                affected = arg_info(op->args[1])->z_mask & ~z_mask;
-            }
-            z_mask = arg_info(op->args[1])->z_mask & z_mask;
-            break;
-
-        case INDEX_op_ext_i32_i64:
-            if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
-                break;
-            }
-            QEMU_FALLTHROUGH;
-        case INDEX_op_extu_i32_i64:
-            /* We do not compute affected as it is a size changing op.  */
-            z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
-            break;
-
-        CASE_OP_32_64(andc):
-            /* Known-zeros does not imply known-ones.  Therefore unless
-               op->args[2] is constant, we can't infer anything from it.  */
-            if (arg_is_const(op->args[2])) {
-                z_mask = ~arg_info(op->args[2])->z_mask;
-                goto and_const;
-            }
-            /* But we certainly know nothing outside args[1] may be set. */
-            z_mask = arg_info(op->args[1])->z_mask;
-            break;
-
-        case INDEX_op_sar_i32:
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & 31;
-                z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
-            }
-            break;
-        case INDEX_op_sar_i64:
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & 63;
-                z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
-            }
-            break;
-
-        case INDEX_op_shr_i32:
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & 31;
-                z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
-            }
-            break;
-        case INDEX_op_shr_i64:
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & 63;
-                z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
-            }
-            break;
-
-        case INDEX_op_extrl_i64_i32:
-            z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
-            break;
-        case INDEX_op_extrh_i64_i32:
-            z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
-            break;
-
-        CASE_OP_32_64(shl):
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
-                z_mask = arg_info(op->args[1])->z_mask << tmp;
-            }
-            break;
-
-        CASE_OP_32_64(neg):
-            /* Set to 1 all bits to the left of the rightmost.  */
-            z_mask = -(arg_info(op->args[1])->z_mask
-                       & -arg_info(op->args[1])->z_mask);
-            break;
-
-        CASE_OP_32_64(deposit):
-            z_mask = deposit64(arg_info(op->args[1])->z_mask,
-                               op->args[3], op->args[4],
-                               arg_info(op->args[2])->z_mask);
-            break;
-
-        CASE_OP_32_64(extract):
-            z_mask = extract64(arg_info(op->args[1])->z_mask,
-                               op->args[2], op->args[3]);
-            if (op->args[2] == 0) {
-                affected = arg_info(op->args[1])->z_mask & ~z_mask;
-            }
-            break;
-        CASE_OP_32_64(sextract):
-            z_mask = sextract64(arg_info(op->args[1])->z_mask,
-                                op->args[2], op->args[3]);
-            if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
-                affected = arg_info(op->args[1])->z_mask & ~z_mask;
-            }
-            break;
-
-        CASE_OP_32_64(or):
-        CASE_OP_32_64(xor):
-            z_mask = arg_info(op->args[1])->z_mask
-                   | arg_info(op->args[2])->z_mask;
-            break;
-
-        case INDEX_op_clz_i32:
-        case INDEX_op_ctz_i32:
-            z_mask = arg_info(op->args[2])->z_mask | 31;
-            break;
-
-        case INDEX_op_clz_i64:
-        case INDEX_op_ctz_i64:
-            z_mask = arg_info(op->args[2])->z_mask | 63;
-            break;
-
-        case INDEX_op_ctpop_i32:
-            z_mask = 32 | 31;
-            break;
-        case INDEX_op_ctpop_i64:
-            z_mask = 64 | 63;
-            break;
-
-        CASE_OP_32_64(setcond):
-        case INDEX_op_setcond2_i32:
-            z_mask = 1;
-            break;
-
-        CASE_OP_32_64(movcond):
-            z_mask = arg_info(op->args[3])->z_mask
-                   | arg_info(op->args[4])->z_mask;
-            break;
-
-        CASE_OP_32_64(ld8u):
-            z_mask = 0xff;
-            break;
-        CASE_OP_32_64(ld16u):
-            z_mask = 0xffff;
-            break;
-        case INDEX_op_ld32u_i64:
-            z_mask = 0xffffffffu;
-            break;
-
-        CASE_OP_32_64(qemu_ld):
-            {
-                MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
-                MemOp mop = get_memop(oi);
-                if (!(mop & MO_SIGN)) {
-                    z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
-                }
-            }
-            break;
-
-        CASE_OP_32_64(bswap16):
-            z_mask = arg_info(op->args[1])->z_mask;
-            if (z_mask <= 0xffff) {
-                op->args[2] |= TCG_BSWAP_IZ;
-            }
-            z_mask = bswap16(z_mask);
-            switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
-            case TCG_BSWAP_OZ:
-                break;
-            case TCG_BSWAP_OS:
-                z_mask = (int16_t)z_mask;
-                break;
-            default: /* undefined high bits */
-                z_mask |= MAKE_64BIT_MASK(16, 48);
-                break;
-            }
-            break;
-
-        case INDEX_op_bswap32_i64:
-            z_mask = arg_info(op->args[1])->z_mask;
-            if (z_mask <= 0xffffffffu) {
-                op->args[2] |= TCG_BSWAP_IZ;
-            }
-            z_mask = bswap32(z_mask);
-            switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
-            case TCG_BSWAP_OZ:
-                break;
-            case TCG_BSWAP_OS:
-                z_mask = (int32_t)z_mask;
-                break;
-            default: /* undefined high bits */
-                z_mask |= MAKE_64BIT_MASK(32, 32);
-                break;
-            }
-            break;
-
-        default:
-            break;
-        }
-
-        /* 32-bit ops generate 32-bit results.  For the result is zero test
-           below, we can ignore high bits, but for further optimizations we
-           need to record that the high bits contain garbage.  */
-        partmask = z_mask;
-        if (ctx.type == TCG_TYPE_I32) {
-            z_mask |= ~(tcg_target_ulong)0xffffffffu;
-            partmask &= 0xffffffffu;
-            affected &= 0xffffffffu;
-        }
-        ctx.z_mask = z_mask;
-
-        if (partmask == 0) {
-            tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
-            continue;
-        }
-        if (affected == 0) {
-            tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
-            continue;
-        }
+        /* Assume all bits affected, and no bits known zero. */
+        ctx.a_mask = -1;
+        ctx.z_mask = -1;
 
         /*
          * Process each opcode.
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_extrh_i64_i32:
             done = fold_extu(&ctx, op);
             break;
+        CASE_OP_32_64(ld8u):
+        CASE_OP_32_64(ld16u):
+        case INDEX_op_ld32u_i64:
+            done = fold_tcg_ld(&ctx, op);
+            break;
         case INDEX_op_mb:
             done = fold_mb(&ctx, op);
             break;
-- 
2.25.1

Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
and muls2_i64.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
     return false;
 }
 
-static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
 {
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
-        uint32_t a = arg_info(op->args[2])->val;
-        uint32_t b = arg_info(op->args[3])->val;
-        uint64_t r = (uint64_t)a * b;
+        uint64_t a = arg_info(op->args[2])->val;
+        uint64_t b = arg_info(op->args[3])->val;
+        uint64_t h, l;
         TCGArg rl, rh;
-        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
+        TCGOp *op2;
+
+        switch (op->opc) {
+        case INDEX_op_mulu2_i32:
+            l = (uint64_t)(uint32_t)a * (uint32_t)b;
+            h = (int32_t)(l >> 32);
+            l = (int32_t)l;
+            break;
+        case INDEX_op_muls2_i32:
+            l = (int64_t)(int32_t)a * (int32_t)b;
+            h = l >> 32;
+            l = (int32_t)l;
+            break;
+        case INDEX_op_mulu2_i64:
+            mulu64(&l, &h, a, b);
+            break;
+        case INDEX_op_muls2_i64:
+            muls64(&l, &h, a, b);
+            break;
+        default:
+            g_assert_not_reached();
+        }
 
         rl = op->args[0];
         rh = op->args[1];
-        tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
-        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
+
+        /* The proper opcode is supplied by tcg_opt_gen_mov. */
+        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+
+        tcg_opt_gen_movi(ctx, op, rl, l);
+        tcg_opt_gen_movi(ctx, op2, rh, h);
         return true;
     }
     return false;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(muluh):
             done = fold_mul_highpart(&ctx, op);
             break;
-        case INDEX_op_mulu2_i32:
-            done = fold_mulu2_i32(&ctx, op);
+        CASE_OP_32_64(muls2):
+        CASE_OP_32_64(mulu2):
+            done = fold_multiply2(&ctx, op);
             break;
         CASE_OP_32_64(nand):
             done = fold_nand(&ctx, op);
-- 
2.25.1

Rename to fold_addsub2.
Use Int128 to implement the wider operation.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/int128.h"
 #include "tcg/tcg-op.h"
 #include "tcg-internal.h"
 
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
     return false;
 }
 
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 {
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
-        uint32_t al = arg_info(op->args[2])->val;
-        uint32_t ah = arg_info(op->args[3])->val;
-        uint32_t bl = arg_info(op->args[4])->val;
-        uint32_t bh = arg_info(op->args[5])->val;
-        uint64_t a = ((uint64_t)ah << 32) | al;
-        uint64_t b = ((uint64_t)bh << 32) | bl;
+        uint64_t al = arg_info(op->args[2])->val;
+        uint64_t ah = arg_info(op->args[3])->val;
+        uint64_t bl = arg_info(op->args[4])->val;
+        uint64_t bh = arg_info(op->args[5])->val;
         TCGArg rl, rh;
-        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
+        TCGOp *op2;
 
-        if (add) {
-            a += b;
+        if (ctx->type == TCG_TYPE_I32) {
+            uint64_t a = deposit64(al, 32, 32, ah);
+            uint64_t b = deposit64(bl, 32, 32, bh);
+
+            if (add) {
+                a += b;
+            } else {
+                a -= b;
+            }
+
+            al = sextract64(a, 0, 32);
+            ah = sextract64(a, 32, 32);
         } else {
-            a -= b;
+            Int128 a = int128_make128(al, ah);
+            Int128 b = int128_make128(bl, bh);
+
+            if (add) {
+                a = int128_add(a, b);
+            } else {
+                a = int128_sub(a, b);
+            }
+
+            al = int128_getlo(a);
+            ah = int128_gethi(a);
         }
 
         rl = op->args[0];
         rh = op->args[1];
-        tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
-        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
+
+        /* The proper opcode is supplied by tcg_opt_gen_mov. */
+        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+
+        tcg_opt_gen_movi(ctx, op, rl, al);
+        tcg_opt_gen_movi(ctx, op2, rh, ah);
         return true;
     }
     return false;
 }
 
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
+static bool fold_add2(OptContext *ctx, TCGOp *op)
 {
-    return fold_addsub2_i32(ctx, op, true);
+    return fold_addsub2(ctx, op, true);
 }
 
 static bool fold_and(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
     return false;
 }
 
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
 {
-    return fold_addsub2_i32(ctx, op, false);
+    return fold_addsub2(ctx, op, false);
 }
 
 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(add):
             done = fold_add(&ctx, op);
             break;
-        case INDEX_op_add2_i32:
-            done = fold_add2_i32(&ctx, op);
+        CASE_OP_32_64(add2):
+            done = fold_add2(&ctx, op);
             break;
         CASE_OP_32_64_VEC(and):
             done = fold_and(&ctx, op);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(sub):
             done = fold_sub(&ctx, op);
             break;
-        case INDEX_op_sub2_i32:
-            done = fold_sub2_i32(&ctx, op);
+        CASE_OP_32_64(sub2):
+            done = fold_sub2(&ctx, op);
             break;
         CASE_OP_32_64_VEC(xor):
             done = fold_xor(&ctx, op);
-- 
2.25.1

Most of these are handled by creating a fold_const2_commutative
to handle all of the binary operators.  The rest were already
handled on a case-by-case basis in the switch, and have their
own fold function in which to place the call.

We now have only one major switch on TCGOpcode.

Introduce NO_DEST and a block comment for swap_commutative in
order to make the handling of brcond and movcond opcodes cleaner.

Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
 1 file changed, 70 insertions(+), 72 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
     return -1;
 }
 
+/**
+ * swap_commutative:
+ * @dest: TCGArg of the destination argument, or NO_DEST.
+ * @p1: first paired argument
+ * @p2: second paired argument
+ *
+ * If *@p1 is a constant and *@p2 is not, swap.
+ * If *@p2 matches @dest, swap.
+ * Return true if a swap was performed.
+ */
+
+#define NO_DEST  temp_arg(NULL)
+
 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 {
     TCGArg a1 = *p1, a2 = *p2;
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
+{
+    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+    return fold_const2(ctx, op);
+}
+
 static bool fold_masks(OptContext *ctx, TCGOp *op)
 {
     uint64_t a_mask = ctx->a_mask;
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
 
 static bool fold_add(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
+    if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_x(ctx, op, 0)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 
 static bool fold_add2(OptContext *ctx, TCGOp *op)
 {
+    /* Note that the high and low parts may be independently swapped. */
+    swap_commutative(op->args[0], &op->args[2], &op->args[4]);
+    swap_commutative(op->args[1], &op->args[3], &op->args[5]);
+
     return fold_addsub2(ctx, op, true);
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
 {
     uint64_t z1, z2;
 
-    if (fold_const2(ctx, op) ||
+    if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, -1) ||
         fold_xx_to_x(ctx, op)) {
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
 static bool fold_brcond(OptContext *ctx, TCGOp *op)
 {
     TCGCond cond = op->args[2];
-    int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
+    int i;
 
+    if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
+        op->args[2] = cond = tcg_swap_cond(cond);
+    }
+
+    i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
     if (i == 0) {
         tcg_op_remove(ctx->tcg, op);
         return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
 {
     TCGCond cond = op->args[4];
-    int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
     TCGArg label = op->args[5];
-    int inv = 0;
+    int i, inv = 0;
 
+    if (swap_commutative2(&op->args[0], &op->args[2])) {
+        op->args[4] = cond = tcg_swap_cond(cond);
+    }
+
+    i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
     if (i >= 0) {
         goto do_brcond_const;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
 
 static bool fold_eqv(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
+    if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_x(ctx, op, -1) ||
         fold_xi_to_not(ctx, op, 0)) {
         return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
 static bool fold_movcond(OptContext *ctx, TCGOp *op)
 {
     TCGCond cond = op->args[5];
-    int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+    int i;
 
+    if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
+        op->args[5] = cond = tcg_swap_cond(cond);
+    }
+    /*
+     * Canonicalize the "false" input reg to match the destination reg so
+     * that the tcg backend can implement a "move if true" operation.
+     */
+    if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
+        op->args[5] = cond = tcg_invert_cond(cond);
+    }
+
+    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
     if (i >= 0) {
         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
 
 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
+    if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_i(ctx, op, 0)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
 
 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
 {
+    swap_commutative(op->args[0], &op->args[2], &op->args[3]);
+
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
         uint64_t a = arg_info(op->args[2])->val;
         uint64_t b = arg_info(op->args[3])->val;
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
 
 static bool fold_nand(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
+    if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_not(ctx, op, -1)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
 
 static bool fold_nor(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
+    if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_not(ctx, op, 0)) {
         return true;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
 
 static bool fold_or(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
+    if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_x(ctx, op, 0) ||
         fold_xx_to_x(ctx, op)) {
         return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
 static bool fold_setcond(OptContext *ctx, TCGOp *op)
 {
     TCGCond cond = op->args[3];
-    int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+    int i;
 
+    if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
+        op->args[3] = cond = tcg_swap_cond(cond);
+    }
+
+    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
     if (i >= 0) {
         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
 {
     TCGCond cond = op->args[5];
-    int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
-    int inv = 0;
+    int i, inv = 0;
 
+    if (swap_commutative2(&op->args[1], &op->args[3])) {
+        op->args[5] = cond = tcg_swap_cond(cond);
+    }
+
+    i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
     if (i >= 0) {
         goto do_setcond_const;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
 
 static bool fold_xor(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
+    if (fold_const2_commutative(ctx, op) ||
         fold_xx_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, 0) ||
         fold_xi_to_not(ctx, op, -1)) {
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             ctx.type = TCG_TYPE_I32;
         }
 
-        /* For commutative operations make constant second argument */
-        switch (opc) {
-        CASE_OP_32_64_VEC(add):
-        CASE_OP_32_64_VEC(mul):
-        CASE_OP_32_64_VEC(and):
-        CASE_OP_32_64_VEC(or):
-        CASE_OP_32_64_VEC(xor):
-        CASE_OP_32_64(eqv):
-        CASE_OP_32_64(nand):
-        CASE_OP_32_64(nor):
-        CASE_OP_32_64(muluh):
-        CASE_OP_32_64(mulsh):
-            swap_commutative(op->args[0], &op->args[1], &op->args[2]);
-            break;
-        CASE_OP_32_64(brcond):
-            if (swap_commutative(-1, &op->args[0], &op->args[1])) {
-                op->args[2] = tcg_swap_cond(op->args[2]);
-            }
-            break;
-        CASE_OP_32_64(setcond):
-            if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
-                op->args[3] = tcg_swap_cond(op->args[3]);
-            }
-            break;
-        CASE_OP_32_64(movcond):
-            if (swap_commutative(-1, &op->args[1], &op->args[2])) {
-                op->args[5] = tcg_swap_cond(op->args[5]);
-            }
-            /* For movcond, we canonicalize the "false" input reg to match
-               the destination reg so that the tcg backend can implement
-               a "move if true" operation.  */
-            if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
-                op->args[5] = tcg_invert_cond(op->args[5]);
-            }
-            break;
-        CASE_OP_32_64(add2):
-            swap_commutative(op->args[0], &op->args[2], &op->args[4]);
-            swap_commutative(op->args[1], &op->args[3], &op->args[5]);
-            break;
-        CASE_OP_32_64(mulu2):
-        CASE_OP_32_64(muls2):
-            swap_commutative(op->args[0], &op->args[2], &op->args[3]);
-            break;
-        case INDEX_op_brcond2_i32:
-            if (swap_commutative2(&op->args[0], &op->args[2])) {
-                op->args[4] = tcg_swap_cond(op->args[4]);
-            }
-            break;
-        case INDEX_op_setcond2_i32:
-            if (swap_commutative2(&op->args[1], &op->args[3])) {
-                op->args[5] = tcg_swap_cond(op->args[5]);
-            }
-            break;
-        default:
-            break;
-        }
-
         /* Assume all bits affected, and no bits known zero. */
         ctx.a_mask = -1;
         ctx.z_mask = -1;
-- 
2.25.1

This "garbage" setting pre-dates the addition of the type
changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
and INDEX_op_extr{l,h}_i64_i32.

So now we have a definitive points at which to adjust z_mask
to eliminate such bits from the 32-bit operands.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
         ti->is_const = true;
         ti->val = ts->val;
         ti->z_mask = ts->val;
-        if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
-            /* High bits of a 32-bit quantity are garbage.  */
-            ti->z_mask |= ~0xffffffffull;
-        }
     } else {
         ti->is_const = false;
         ti->z_mask = -1;
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
     TCGTemp *src_ts = arg_temp(src);
     TempOptInfo *di;
     TempOptInfo *si;
-    uint64_t z_mask;
     TCGOpcode new_op;
 
     if (ts_are_copies(dst_ts, src_ts)) {
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
     op->args[0] = dst;
     op->args[1] = src;
 
-    z_mask = si->z_mask;
-    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
-        /* High bits of the destination are now garbage.  */
-        z_mask |= ~0xffffffffull;
-    }
-    di->z_mask = z_mask;
+    di->z_mask = si->z_mask;
 
     if (src_ts->type == dst_ts->type) {
         TempOptInfo *ni = ts_info(si->next_copy);
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
                              TCGArg dst, uint64_t val)
 {
-    /* Convert movi to mov with constant temp. */
-    TCGTemp *tv = tcg_constant_internal(ctx->type, val);
+    TCGTemp *tv;
 
+    if (ctx->type == TCG_TYPE_I32) {
+        val = (int32_t)val;
+    }
+
+    /* Convert movi to mov with constant temp. */
+    tv = tcg_constant_internal(ctx->type, val);
     init_ts_info(ctx, tv);
     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 }
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
     uint64_t z_mask = ctx->z_mask;
 
     /*
-     * 32-bit ops generate 32-bit results.  For the result is zero test
-     * below, we can ignore high bits, but for further optimizations we
-     * need to record that the high bits contain garbage.
+     * 32-bit ops generate 32-bit results, which for the purpose of
+     * simplifying tcg are sign-extended.  Certainly that's how we
+     * represent our constants elsewhere.  Note that the bits will
+     * be reset properly for a 64-bit value when encountering the
+     * type changing opcodes.
      */
     if (ctx->type == TCG_TYPE_I32) {
-        ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
-        a_mask &= MAKE_64BIT_MASK(0, 32);
-        z_mask &= MAKE_64BIT_MASK(0, 32);
+        a_mask = (int32_t)a_mask;
+        z_mask = (int32_t)z_mask;
+        ctx->z_mask = z_mask;
     }
 
     if (z_mask == 0) {
-- 
2.25.1

Certain targets, like riscv, produce signed 32-bit results.
This can lead to lots of redundant extensions as values are
manipulated.

Begin by tracking only the obvious sign-extensions, and
converting them to simple copies when possible.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 102 insertions(+), 21 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
     TCGTemp *next_copy;
     uint64_t val;
     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
+    uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
 } TempOptInfo;
 
 typedef struct OptContext {
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
     /* In flight values from optimization. */
     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
+    uint64_t s_mask;  /* mask of clrsb(value) bits */
     TCGType type;
 } OptContext;
 
+/* Calculate the smask for a specific value. */
+static uint64_t smask_from_value(uint64_t value)
+{
+    int rep = clrsb64(value);
+    return ~(~0ull >> rep);
+}
+
+/*
+ * Calculate the smask for a given set of known-zeros.
+ * If there are lots of zeros on the left, we can consider the remainder
+ * an unsigned field, and thus the corresponding signed field is one bit
+ * larger.
+ */
+static uint64_t smask_from_zmask(uint64_t zmask)
+{
+    /*
+     * Only the 0 bits are significant for zmask, thus the msb itself
+     * must be zero, else we have no sign information.
+     */
+    int rep = clz64(zmask);
+    if (rep == 0) {
+        return 0;
+    }
+    rep -= 1;
+    return ~(~0ull >> rep);
+}
+
 static inline TempOptInfo *ts_info(TCGTemp *ts)
 {
     return ts->state_ptr;
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
     ti->prev_copy = ts;
     ti->is_const = false;
     ti->z_mask = -1;
+    ti->s_mask = 0;
 }
 
 static void reset_temp(TCGArg arg)
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
         ti->is_const = true;
         ti->val = ts->val;
         ti->z_mask = ts->val;
+        ti->s_mask = smask_from_value(ts->val);
     } else {
         ti->is_const = false;
         ti->z_mask = -1;
+        ti->s_mask = 0;
     }
 }
 
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
     op->args[1] = src;
 
     di->z_mask = si->z_mask;
+    di->s_mask = si->s_mask;
 
     if (src_ts->type == dst_ts->type) {
         TempOptInfo *ni = ts_info(si->next_copy);
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
 
     nb_oargs = def->nb_oargs;
     for (i = 0; i < nb_oargs; i++) {
-        reset_temp(op->args[i]);
+        TCGTemp *ts = arg_temp(op->args[i]);
+        reset_ts(ts);
         /*
-         * Save the corresponding known-zero bits mask for the
+         * Save the corresponding known-zero/sign bits mask for the
          * first output argument (only one supported so far).
          */
         if (i == 0) {
-            arg_info(op->args[i])->z_mask = ctx->z_mask;
+            ts_info(ts)->z_mask = ctx->z_mask;
+            ts_info(ts)->s_mask = ctx->s_mask;
         }
     }
 }
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
 {
     uint64_t a_mask = ctx->a_mask;
     uint64_t z_mask = ctx->z_mask;
+    uint64_t s_mask = ctx->s_mask;
 
     /*
      * 32-bit ops generate 32-bit results, which for the purpose of
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
     if (ctx->type == TCG_TYPE_I32) {
         a_mask = (int32_t)a_mask;
         z_mask = (int32_t)z_mask;
+        s_mask |= MAKE_64BIT_MASK(32, 32);
         ctx->z_mask = z_mask;
+        ctx->s_mask = s_mask;
     }
 
     if (z_mask == 0) {
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
 
 static bool fold_bswap(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask, sign;
+    uint64_t z_mask, s_mask, sign;
 
     if (arg_is_const(op->args[1])) {
         uint64_t t = arg_info(op->args[1])->val;
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
     }
 
     z_mask = arg_info(op->args[1])->z_mask;
+
     switch (op->opc) {
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap16_i64:
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
     default:
         g_assert_not_reached();
     }
+    s_mask = smask_from_zmask(z_mask);
 
     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
     case TCG_BSWAP_OZ:
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
         /* If the sign bit may be 1, force all the bits above to 1. */
         if (z_mask & sign) {
             z_mask |= sign;
+            s_mask = sign << 1;
         }
         break;
     default:
         /* The high bits are undefined: force all bits above the sign to 1. */
         z_mask |= sign << 1;
+        s_mask = 0;
         break;
     }
     ctx->z_mask = z_mask;
+    ctx->s_mask = s_mask;
 
     return fold_masks(ctx, op);
 }
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
 static bool fold_extract(OptContext *ctx, TCGOp *op)
 {
     uint64_t z_mask_old, z_mask;
+    int pos = op->args[2];
+    int len = op->args[3];
 
     if (arg_is_const(op->args[1])) {
         uint64_t t;
 
         t = arg_info(op->args[1])->val;
-        t = extract64(t, op->args[2], op->args[3]);
+        t = extract64(t, pos, len);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
 
     z_mask_old = arg_info(op->args[1])->z_mask;
-    z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
-    if (op->args[2] == 0) {
+    z_mask = extract64(z_mask_old, pos, len);
+    if (pos == 0) {
         ctx->a_mask = z_mask_old ^ z_mask;
     }
     ctx->z_mask = z_mask;
+    ctx->s_mask = smask_from_zmask(z_mask);
 
     return fold_masks(ctx, op);
 }
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
 
 static bool fold_exts(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask_old, z_mask, sign;
+    uint64_t s_mask_old, s_mask, z_mask, sign;
     bool type_change = false;
 
     if (fold_const1(ctx, op)) {
         return true;
     }
 
-    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
+    z_mask = arg_info(op->args[1])->z_mask;
+    s_mask = arg_info(op->args[1])->s_mask;
+    s_mask_old = s_mask;
 
     switch (op->opc) {
     CASE_OP_32_64(ext8s):
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
 
     if (z_mask & sign) {
         z_mask |= sign;
-    } else if (!type_change) {
-        ctx->a_mask = z_mask_old ^ z_mask;
     }
+    s_mask |= sign << 1;
+
     ctx->z_mask = z_mask;
+    ctx->s_mask = s_mask;
+    if (!type_change) {
+        ctx->a_mask = s_mask & ~s_mask_old;
+    }
 
     return fold_masks(ctx, op);
 }
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
     }
 
     ctx->z_mask = z_mask;
+    ctx->s_mask = smask_from_zmask(z_mask);
     if (!type_change) {
         ctx->a_mask = z_mask_old ^ z_mask;
     }
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
     MemOp mop = get_memop(oi);
     int width = 8 * memop_size(mop);
 
-    if (!(mop & MO_SIGN) && width < 64) {
-        ctx->z_mask = MAKE_64BIT_MASK(0, width);
+    if (width < 64) {
+        ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
+        if (!(mop & MO_SIGN)) {
+            ctx->z_mask = MAKE_64BIT_MASK(0, width);
+            ctx->s_mask <<= 1;
+        }
     }
 
     /* Opcodes that touch guest memory stop the mb optimization.  */
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
 
 static bool fold_sextract(OptContext *ctx, TCGOp *op)
 {
-    int64_t z_mask_old, z_mask;
+    uint64_t z_mask, s_mask, s_mask_old;
+    int pos = op->args[2];
+    int len = op->args[3];
 
     if (arg_is_const(op->args[1])) {
         uint64_t t;
 
         t = arg_info(op->args[1])->val;
-        t = sextract64(t, op->args[2], op->args[3]);
+        t = sextract64(t, pos, len);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
 
-    z_mask_old = arg_info(op->args[1])->z_mask;
-    z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
-    if (op->args[2] == 0 && z_mask >= 0) {
-        ctx->a_mask = z_mask_old ^ z_mask;
-    }
+    z_mask = arg_info(op->args[1])->z_mask;
+    z_mask = sextract64(z_mask, pos, len);
     ctx->z_mask = z_mask;
 
+    s_mask_old = arg_info(op->args[1])->s_mask;
+    s_mask = sextract64(s_mask_old, pos, len);
+    s_mask |= MAKE_64BIT_MASK(len, 64 - len);
+    ctx->s_mask = s_mask;
+
+    if (pos == 0) {
+        ctx->a_mask = s_mask & ~s_mask_old;
+    }
+
     return fold_masks(ctx, op);
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
 {
     /* We can't do any folding with a load, but we can record bits. */
     switch (op->opc) {
+    CASE_OP_32_64(ld8s):
+        ctx->s_mask = MAKE_64BIT_MASK(8, 56);
+        break;
     CASE_OP_32_64(ld8u):
         ctx->z_mask = MAKE_64BIT_MASK(0, 8);
+        ctx->s_mask = MAKE_64BIT_MASK(9, 55);
+        break;
+    CASE_OP_32_64(ld16s):
+        ctx->s_mask = MAKE_64BIT_MASK(16, 48);
         break;
     CASE_OP_32_64(ld16u):
         ctx->z_mask = MAKE_64BIT_MASK(0, 16);
+        ctx->s_mask = MAKE_64BIT_MASK(17, 47);
+        break;
+    case INDEX_op_ld32s_i64:
+        ctx->s_mask = MAKE_64BIT_MASK(32, 32);
         break;
     case INDEX_op_ld32u_i64:
         ctx->z_mask = MAKE_64BIT_MASK(0, 32);
+        ctx->s_mask = MAKE_64BIT_MASK(33, 31);
         break;
     default:
         g_assert_not_reached();
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             ctx.type = TCG_TYPE_I32;
         }
 
-        /* Assume all bits affected, and no bits known zero. */
+        /* Assume all bits affected, no bits known zero, no sign reps. */
         ctx.a_mask = -1;
         ctx.z_mask = -1;
+        ctx.s_mask = 0;
 
         /*
          * Process each opcode.
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_extrh_i64_i32:
             done = fold_extu(&ctx, op);
             break;
+        CASE_OP_32_64(ld8s):
         CASE_OP_32_64(ld8u):
+        CASE_OP_32_64(ld16s):
         CASE_OP_32_64(ld16u):
+        case INDEX_op_ld32s_i64:
         case INDEX_op_ld32u_i64:
             done = fold_tcg_ld(&ctx, op);
             break;
-- 
2.25.1

Sign repetitions are perforce all identical, whether they are 1 or 0.
Bitwise operations preserve the relative quantity of the repetitions.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
     z2 = arg_info(op->args[2])->z_mask;
     ctx->z_mask = z1 & z2;
 
+    /*
+     * Sign repetitions are perforce all identical, whether they are 1 or 0.
+     * Bitwise operations preserve the relative quantity of the repetitions.
+     */
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+
     /*
      * Known-zeros does not imply known-ones.  Therefore unless
      * arg2 is constant, we can't infer affected bits from it.
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
     }
     ctx->z_mask = z1;
 
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
     return fold_masks(ctx, op);
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
         fold_xi_to_not(ctx, op, 0)) {
         return true;
     }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
     return false;
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
 
     ctx->z_mask = arg_info(op->args[3])->z_mask
                 | arg_info(op->args[4])->z_mask;
+    ctx->s_mask = arg_info(op->args[3])->s_mask
+                & arg_info(op->args[4])->s_mask;
 
     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
         uint64_t tv = arg_info(op->args[3])->val;
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
         fold_xi_to_not(ctx, op, -1)) {
         return true;
     }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
     return false;
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
         fold_xi_to_not(ctx, op, 0)) {
         return true;
     }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
     return false;
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
         return true;
     }
 
+    ctx->s_mask = arg_info(op->args[1])->s_mask;
+
     /* Because of fold_to_not, we want to always return true, via finish. */
     finish_folding(ctx, op);
     return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
 
     ctx->z_mask = arg_info(op->args[1])->z_mask
                 | arg_info(op->args[2])->z_mask;
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
     return fold_masks(ctx, op);
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
         fold_ix_to_not(ctx, op, 0)) {
         return true;
     }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
     return false;
 }
 
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
 
     ctx->z_mask = arg_info(op->args[1])->z_mask
                 | arg_info(op->args[2])->z_mask;
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
     return fold_masks(ctx, op);
 }
 
-- 
2.25.1

For constant shifts, we can simply shift the s_mask.

For variable shifts, we know that sar does not reduce
the s_mask, which helps for sequences like

ext32s_i64  t, in
    sar_i64     t, t, v
    ext32s_i64  out, t

allowing the final extend to be eliminated.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
     return ~(~0ull >> rep);
 }
 
+/*
+ * Recreate a properly left-aligned smask after manipulation.
+ * Some bit-shuffling, particularly shifts and rotates, may
+ * retain sign bits on the left, but may scatter disconnected
+ * sign bits on the right.  Retain only what remains to the left.
+ */
+static uint64_t smask_from_smask(int64_t smask)
+{
+    /* Only the 1 bits are significant for smask */
+    return smask_from_zmask(~smask);
+}
+
 static inline TempOptInfo *ts_info(TCGTemp *ts)
 {
     return ts->state_ptr;
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
 
 static bool fold_shift(OptContext *ctx, TCGOp *op)
 {
+    uint64_t s_mask, z_mask, sign;
+
     if (fold_const2(ctx, op) ||
         fold_ix_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, 0)) {
         return true;
     }
 
+    s_mask = arg_info(op->args[1])->s_mask;
+    z_mask = arg_info(op->args[1])->z_mask;
+
     if (arg_is_const(op->args[2])) {
-        ctx->z_mask = do_constant_folding(op->opc, ctx->type,
-                                          arg_info(op->args[1])->z_mask,
-                                          arg_info(op->args[2])->val);
+        int sh = arg_info(op->args[2])->val;
+
+        ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+
+        s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
+        ctx->s_mask = smask_from_smask(s_mask);
+
         return fold_masks(ctx, op);
     }
+
+    switch (op->opc) {
+    CASE_OP_32_64(sar):
+        /*
+         * Arithmetic right shift will not reduce the number of
+         * input sign repetitions.
+         */
+        ctx->s_mask = s_mask;
+        break;
+    CASE_OP_32_64(shr):
+        /*
+         * If the sign bit is known zero, then logical right shift
+         * will not reduced the number of input sign repetitions.
+         */
+        sign = (s_mask & -s_mask) >> 1;
+        if (!(z_mask & sign)) {
+            ctx->s_mask = s_mask;
+        }
+        break;
+    default:
+        break;
+    }
+
     return false;
 }
 
-- 
2.25.1

Note that I have refreshed the expiry of my public key.
and pushed to keys.openpgp.org.

The following changes since commit 4d5d933bbc7cc52f6cc6b9021f91fa06266222d5:

Merge tag 'pull-xenfv-20250116' of git://git.infradead.org/users/dwmw2/qemu into staging (2025-01-16 09:03:43 -0500)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20250117

for you to fetch changes up to db1649823d4f27b924a5aa5f9e0111457accb798:

softfloat: Constify helpers returning float_status field (2025-01-17 08:29:25 -0800)

----------------------------------------------------------------
tcg:
  - Add TCGOP_TYPE, TCGOP_FLAGS.
  - Pass type and flags to tcg_op_supported, tcg_target_op_def.
  - Split out tcg-target-has.h and unexport from tcg.h.
  - Reorg constraint processing; constify TCGOpDef.
  - Make extract, sextract, deposit opcodes mandatory.
  - Merge ext{8,16,32}{s,u} opcodes into {s}extract.
tcg/mips: Expand bswap unconditionally
tcg/riscv: Use SRAIW, SRLIW for {s}extract_i64
tcg/riscv: Use BEXTI for single-bit extractions
tcg/sparc64: Use SRA, SRL for {s}extract_i64

disas/riscv: Guard dec->cfg dereference for host disassemble
util/cpuinfo-riscv: Detect Zbs
accel/tcg: Call tcg_tb_insert() for one-insn TBs
linux-user: Add missing /proc/cpuinfo fields for sparc

----------------------------------------------------------------
Helge Deller (1):
      linux-user: Add missing /proc/cpuinfo fields for sparc

Ilya Leoshkevich (2):
      tcg: Document tb_lookup() and tcg_tb_lookup()
      accel/tcg: Call tcg_tb_insert() for one-insn TBs

LIU Zhiwei (1):
      disas/riscv: Guard dec->cfg dereference for host disassemble

Philippe Mathieu-Daudé (1):
      softfloat: Constify helpers returning float_status field

Richard Henderson (63):
      tcg: Move call abi parameters from tcg-target.h to tcg-target.c.inc
      tcg: Replace TCGOP_VECL with TCGOP_TYPE
      tcg: Move tcg_op_insert_{after,before} decls to tcg-internal.h
      tcg: Copy TCGOP_TYPE in tcg_op_insert_{after,before}
      tcg: Add TCGOP_FLAGS
      tcg: Add type and flags arguments to tcg_op_supported
      target/arm: Do not test TCG_TARGET_HAS_bitsel_vec
      target/arm: Use tcg_op_supported
      target/tricore: Use tcg_op_supported
      tcg: Add tcg_op_deposit_valid
      target/i386: Remove TCG_TARGET_extract_tl_valid
      target/i386: Use tcg_op_deposit_valid
      target/i386: Use tcg_op_supported
      tcg: Remove TCG_TARGET_NEED_LDST_LABELS and TCG_TARGET_NEED_POOL_LABELS
      tcg: Rename tcg-target.opc.h to tcg-target-opc.h.inc
      tcg/tci: Move TCI specific opcodes to tcg-target-opc.h.inc
      tcg: Move fallback tcg_can_emit_vec_op out of line
      tcg/ppc: Remove TCGPowerISA enum
      tcg: Extract default TCG_TARGET_HAS_foo definitions to 'tcg-has.h'
      tcg/aarch64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/arm: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/i386: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/loongarch64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/mips: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/ppc: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/riscv: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/s390x: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/sparc64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg/tci: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
      tcg: Include 'tcg-target-has.h' once in 'tcg-has.h'
      tcg: Only include 'tcg-has.h' when necessary
      tcg: Split out tcg-target-mo.h
      tcg: Use C_NotImplemented in tcg_target_op_def
      tcg: Change have_vec to has_type in tcg_op_supported
      tcg: Reorg process_op_defs
      tcg: Remove args_ct from TCGOpDef
      tcg: Constify tcg_op_defs
      tcg: Validate op supported in opcode_args_ct
      tcg: Add TCG_OPF_NOT_PRESENT to opcodes without inputs or outputs
      tcg: Pass type and flags to tcg_target_op_def
      tcg: Add TCGType argument to tcg_out_op
      tcg: Remove TCG_OPF_64BIT
      tcg: Drop implementation checks from tcg-opc.h
      tcg: Replace IMPLVEC with TCG_OPF_VECTOR
      tcg/mips: Expand bswap unconditionally
      tcg/i386: Handle all 8-bit extensions for i686
      tcg/i386: Fold the ext{8,16,32}[us] cases into {s}extract
      tcg/aarch64: Provide TCG_TARGET_{s}extract_valid
      tcg/aarch64: Expand extract with offset 0 with andi
      tcg/arm: Add full [US]XT[BH] into {s}extract
      tcg/loongarch64: Fold the ext{8,16,32}[us] cases into {s}extract
      tcg/mips: Fold the ext{8,16,32}[us] cases into {s}extract
      tcg/ppc: Fold the ext{8,16,32}[us] cases into {s}extract
      tcg/riscv64: Fold the ext{8,16,32}[us] cases into {s}extract
      tcg/riscv: Use SRAIW, SRLIW for {s}extract_i64
      tcg/s390x: Fold the ext{8,16,32}[us] cases into {s}extract
      tcg/sparc64: Use SRA, SRL for {s}extract_i64
      tcg/tci: Provide TCG_TARGET_{s}extract_valid
      tcg/tci: Remove assertions for deposit and extract
      tcg: Remove TCG_TARGET_HAS_{s}extract_{i32,i64}
      tcg: Remove TCG_TARGET_HAS_deposit_{i32,i64}
      util/cpuinfo-riscv: Detect Zbs
      tcg/riscv: Use BEXTI for single-bit extractions

accel/tcg/internal-target.h                        |   1 +
 host/include/riscv/host/cpuinfo.h                  |   5 +-
 include/fpu/softfloat-helpers.h                    |  25 +-
 include/tcg/tcg-opc.h                              | 355 +++++-------
 include/tcg/tcg.h                                  | 187 ++----
 linux-user/sparc/target_proc.h                     |  20 +-
 tcg/aarch64/tcg-target-has.h                       | 117 ++++
 tcg/aarch64/tcg-target-mo.h                        |  12 +
 tcg/aarch64/tcg-target.h                           | 126 ----
 tcg/arm/tcg-target-has.h                           | 100 ++++
 tcg/arm/tcg-target-mo.h                            |  13 +
 tcg/arm/tcg-target.h                               |  86 ---
 tcg/i386/tcg-target-has.h                          | 169 ++++++
 tcg/i386/tcg-target-mo.h                           |  19 +
 tcg/i386/tcg-target.h                              | 162 ------
 tcg/loongarch64/tcg-target-has.h                   | 119 ++++
 tcg/loongarch64/tcg-target-mo.h                    |  12 +
 tcg/loongarch64/tcg-target.h                       | 115 ----
 tcg/mips/tcg-target-has.h                          | 135 +++++
 tcg/mips/tcg-target-mo.h                           |  13 +
 tcg/mips/tcg-target.h                              | 130 -----
 tcg/ppc/tcg-target-has.h                           | 131 +++++
 tcg/ppc/tcg-target-mo.h                            |  12 +
 tcg/ppc/tcg-target.h                               | 126 ----
 tcg/riscv/tcg-target-has.h                         | 135 +++++
 tcg/riscv/tcg-target-mo.h                          |  12 +
 tcg/riscv/tcg-target.h                             | 116 ----
 tcg/s390x/tcg-target-has.h                         | 137 +++++
 tcg/s390x/tcg-target-mo.h                          |  12 +
 tcg/s390x/tcg-target.h                             | 126 ----
 tcg/sparc64/tcg-target-has.h                       |  87 +++
 tcg/sparc64/tcg-target-mo.h                        |  12 +
 tcg/sparc64/tcg-target.h                           |  91 ---
 tcg/tcg-has.h                                      | 101 ++++
 tcg/tcg-internal.h                                 |  18 +-
 tcg/tci/tcg-target-has.h                           |  81 +++
 tcg/tci/tcg-target-mo.h                            |  17 +
 tcg/tci/tcg-target.h                               |  94 ---
 accel/tcg/cpu-exec.c                               |  15 +-
 accel/tcg/translate-all.c                          |  29 +-
 disas/riscv.c                                      |  23 +-
 target/arm/tcg/translate-a64.c                     |  10 +-
 target/arm/tcg/translate-sve.c                     |  22 +-
 target/arm/tcg/translate.c                         |   2 +-
 target/tricore/translate.c                         |   4 +-
 tcg/optimize.c                                     |  27 +-
 tcg/tcg-common.c                                   |   5 +-
 tcg/tcg-op-gvec.c                                  |   1 +
 tcg/tcg-op-ldst.c                                  |  29 +-
 tcg/tcg-op-vec.c                                   |   9 +-
 tcg/tcg-op.c                                       | 149 ++---
 tcg/tcg.c                                          | 643 ++++++++++++++-------
 tcg/tci.c                                          |  13 +-
 util/cpuinfo-riscv.c                               |  18 +-
 docs/devel/tcg-ops.rst                             |  15 +-
 target/i386/tcg/emit.c.inc                         |  14 +-
 .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |   4 +-
 tcg/aarch64/tcg-target.c.inc                       |  33 +-
 tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} |   6 +-
 tcg/arm/tcg-target.c.inc                           |  71 ++-
 .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |  22 +-
 tcg/i386/tcg-target.c.inc                          | 121 +++-
 .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |   0
 tcg/loongarch64/tcg-target.c.inc                   |  59 +-
 tcg/mips/tcg-target-opc.h.inc                      |   1 +
 tcg/mips/tcg-target.c.inc                          |  55 +-
 tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} |  12 +-
 tcg/ppc/tcg-target.c.inc                           |  39 +-
 .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |   0
 tcg/riscv/tcg-target.c.inc                         |  66 ++-
 .../{tcg-target.opc.h => tcg-target-opc.h.inc}     |   6 +-
 tcg/s390x/tcg-target.c.inc                         |  59 +-
 tcg/sparc64/tcg-target-opc.h.inc                   |   1 +
 tcg/sparc64/tcg-target.c.inc                       |  29 +-
 tcg/tcg-ldst.c.inc                                 |  65 ---
 tcg/tcg-pool.c.inc                                 | 162 ------
 tcg/tci/tcg-target-opc.h.inc                       |   4 +
 tcg/tci/tcg-target.c.inc                           |  53 +-
 78 files changed, 2856 insertions(+), 2269 deletions(-)
 create mode 100644 tcg/aarch64/tcg-target-has.h
 create mode 100644 tcg/aarch64/tcg-target-mo.h
 create mode 100644 tcg/arm/tcg-target-has.h
 create mode 100644 tcg/arm/tcg-target-mo.h
 create mode 100644 tcg/i386/tcg-target-has.h
 create mode 100644 tcg/i386/tcg-target-mo.h
 create mode 100644 tcg/loongarch64/tcg-target-has.h
 create mode 100644 tcg/loongarch64/tcg-target-mo.h
 create mode 100644 tcg/mips/tcg-target-has.h
 create mode 100644 tcg/mips/tcg-target-mo.h
 create mode 100644 tcg/ppc/tcg-target-has.h
 create mode 100644 tcg/ppc/tcg-target-mo.h
 create mode 100644 tcg/riscv/tcg-target-has.h
 create mode 100644 tcg/riscv/tcg-target-mo.h
 create mode 100644 tcg/s390x/tcg-target-has.h
 create mode 100644 tcg/s390x/tcg-target-mo.h
 create mode 100644 tcg/sparc64/tcg-target-has.h
 create mode 100644 tcg/sparc64/tcg-target-mo.h
 create mode 100644 tcg/tcg-has.h
 create mode 100644 tcg/tci/tcg-target-has.h
 create mode 100644 tcg/tci/tcg-target-mo.h
 rename tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (82%)
 rename tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} (75%)
 rename tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc} (72%)
 rename tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 create mode 100644 tcg/mips/tcg-target-opc.h.inc
 rename tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} (83%)
 rename tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 rename tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc} (75%)
 create mode 100644 tcg/sparc64/tcg-target-opc.h.inc
 delete mode 100644 tcg/tcg-ldst.c.inc
 delete mode 100644 tcg/tcg-pool.c.inc
 create mode 100644 tcg/tci/tcg-target-opc.h.inc

From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>

For riscv host, it will set dec->cfg to zero. Thus we shuld guard
the dec->cfg deference for riscv host disassemble.

And in general, we should only use dec->cfg for target in three cases:

1) For not incompatible encodings, such as zcmp/zcmt/zfinx.
2) For maybe-ops encodings, they are better to be disassembled to
   the "real" extensions, such as zicfiss. The guard of dec->zimop
   and dec->zcmop is for comment and avoid check for every extension
   that encoded in maybe-ops area.
3) For custom encodings, we have to use dec->cfg to disassemble
   custom encodings using the same encoding area.

Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20241206032411.52528-1-zhiwei_liu@linux.alibaba.com>
---
 disas/riscv.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/disas/riscv.c b/disas/riscv.c
index XXXXXXX..XXXXXXX 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
             break;
         case 2: op = rv_op_c_li; break;
         case 3:
-            if (dec->cfg->ext_zcmop) {
+            if (dec->cfg && dec->cfg->ext_zcmop) {
                 if ((((inst >> 2) & 0b111111) == 0b100000) &&
                     (((inst >> 11) & 0b11) == 0b0)) {
                     unsigned int cmop_code = 0;
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 op = rv_op_c_sqsp;
             } else {
                 op = rv_op_c_fsdsp;
-                if (dec->cfg->ext_zcmp && ((inst >> 12) & 0b01)) {
+                if (dec->cfg && dec->cfg->ext_zcmp && ((inst >> 12) & 0b01)) {
                     switch ((inst >> 8) & 0b01111) {
                     case 8:
                         if (((inst >> 4) & 0b01111) >= 4) {
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 } else {
                     switch ((inst >> 10) & 0b011) {
                     case 0:
-                        if (!dec->cfg->ext_zcmt) {
+                        if (dec->cfg && !dec->cfg->ext_zcmt) {
                             break;
                         }
                         if (((inst >> 2) & 0xFF) >= 32) {
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                         }
                         break;
                     case 3:
-                        if (!dec->cfg->ext_zcmp) {
+                        if (dec->cfg && !dec->cfg->ext_zcmp) {
                             break;
                         }
                         switch ((inst >> 5) & 0b011) {
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
             break;
         case 5:
             op = rv_op_auipc;
-            if (dec->cfg->ext_zicfilp &&
+            if (dec->cfg && dec->cfg->ext_zicfilp &&
                 (((inst >> 7) & 0b11111) == 0b00000)) {
                 op = rv_op_lpad;
             }
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
             case 2: op = rv_op_csrrs; break;
             case 3: op = rv_op_csrrc; break;
             case 4:
-                if (dec->cfg->ext_zimop) {
+                if (dec->cfg && dec->cfg->ext_zimop) {
                     int imm_mop5, imm_mop3, reg_num;
                     if ((extract32(inst, 22, 10) & 0b1011001111)
                         == 0b1000000111) {
@@ -XXX,XX +XXX,XX @@ static GString *format_inst(size_t tab, rv_decode *dec)
             g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
             break;
         case '3':
-            if (dec->cfg->ext_zfinx) {
+            if (dec->cfg && dec->cfg->ext_zfinx) {
                 g_string_append(buf, rv_ireg_name_sym[dec->rd]);
             } else {
                 g_string_append(buf, rv_freg_name_sym[dec->rd]);
             }
             break;
         case '4':
-            if (dec->cfg->ext_zfinx) {
+            if (dec->cfg && dec->cfg->ext_zfinx) {
                 g_string_append(buf, rv_ireg_name_sym[dec->rs1]);
             } else {
                 g_string_append(buf, rv_freg_name_sym[dec->rs1]);
             }
             break;
         case '5':
-            if (dec->cfg->ext_zfinx) {
+            if (dec->cfg && dec->cfg->ext_zfinx) {
                 g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
             } else {
                 g_string_append(buf, rv_freg_name_sym[dec->rs2]);
             }
             break;
         case '6':
-            if (dec->cfg->ext_zfinx) {
+            if (dec->cfg && dec->cfg->ext_zfinx) {
                 g_string_append(buf, rv_ireg_name_sym[dec->rs3]);
             } else {
                 g_string_append(buf, rv_freg_name_sym[dec->rs3]);
@@ -XXX,XX +XXX,XX @@ static GString *disasm_inst(rv_isa isa, uint64_t pc, rv_inst inst,
         const rv_opcode_data *opcode_data = decoders[i].opcode_data;
         void (*decode_func)(rv_decode *, rv_isa) = decoders[i].decode_func;
 
-        if (guard_func(cfg)) {
+        /* always_true_p don't dereference cfg */
+        if (((i == 0) || cfg) && guard_func(cfg)) {
             dec.opcode_data = opcode_data;
             decode_func(&dec, isa);
             if (dec.op != rv_op_illegal)
-- 
2.43.0

These defines are not required outside of tcg/tcg.c,
which includes tcg-target.c.inc before use.
Reduces the exported symbol set of tcg-target.h.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.h         | 13 -------------
 tcg/arm/tcg-target.h             |  8 --------
 tcg/i386/tcg-target.h            | 20 --------------------
 tcg/loongarch64/tcg-target.h     |  9 ---------
 tcg/mips/tcg-target.h            | 14 --------------
 tcg/riscv/tcg-target.h           |  9 ---------
 tcg/s390x/tcg-target.h           |  8 --------
 tcg/sparc64/tcg-target.h         | 11 -----------
 tcg/tci/tcg-target.h             | 14 --------------
 tcg/aarch64/tcg-target.c.inc     | 13 +++++++++++++
 tcg/arm/tcg-target.c.inc         |  8 ++++++++
 tcg/i386/tcg-target.c.inc        | 20 ++++++++++++++++++++
 tcg/loongarch64/tcg-target.c.inc |  9 +++++++++
 tcg/mips/tcg-target.c.inc        | 14 ++++++++++++++
 tcg/riscv/tcg-target.c.inc       |  9 +++++++++
 tcg/s390x/tcg-target.c.inc       |  8 ++++++++
 tcg/sparc64/tcg-target.c.inc     | 10 ++++++++++
 tcg/tci/tcg-target.c.inc         | 14 ++++++++++++++
 18 files changed, 105 insertions(+), 106 deletions(-)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 
 #define TCG_TARGET_NB_REGS 64
 
-/* used for function call generation */
-#define TCG_REG_CALL_STACK              TCG_REG_SP
-#define TCG_TARGET_STACK_ALIGN          16
-#define TCG_TARGET_CALL_STACK_OFFSET    0
-#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
-#ifdef CONFIG_DARWIN
-# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
-#else
-# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
-#endif
-#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
-
 #define have_lse    (cpuinfo & CPUINFO_LSE)
 #define have_lse2   (cpuinfo & CPUINFO_LSE2)
 
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool use_idiv_instructions;
 extern bool use_neon_instructions;
 #endif
 
-/* used for function call generation */
-#define TCG_TARGET_STACK_ALIGN		8
-#define TCG_TARGET_CALL_STACK_OFFSET	0
-#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_EVEN
-#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_EVEN
-#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_BY_REF
-
 /* optional instructions */
 #define TCG_TARGET_HAS_ext8s_i32        1
 #define TCG_TARGET_HAS_ext16s_i32       1
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_CALL_STACK = TCG_REG_ESP
 } TCGReg;
 
-/* used for function call generation */
-#define TCG_TARGET_STACK_ALIGN 16
-#if defined(_WIN64)
-#define TCG_TARGET_CALL_STACK_OFFSET 32
-#else
-#define TCG_TARGET_CALL_STACK_OFFSET 0
-#endif
-#define TCG_TARGET_CALL_ARG_I32      TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_NORMAL
-#if defined(_WIN64)
-# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_BY_REF
-# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_BY_VEC
-#elif TCG_TARGET_REG_BITS == 64
-# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_NORMAL
-# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_NORMAL
-#else
-# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_NORMAL
-# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_BY_REF
-#endif
-
 #define have_bmi1         (cpuinfo & CPUINFO_BMI1)
 #define have_popcnt       (cpuinfo & CPUINFO_POPCNT)
 #define have_avx1         (cpuinfo & CPUINFO_AVX1)
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_VEC_TMP0 = TCG_REG_V23,
 } TCGReg;
 
-/* used for function call generation */
-#define TCG_REG_CALL_STACK              TCG_REG_SP
-#define TCG_TARGET_STACK_ALIGN          16
-#define TCG_TARGET_CALL_STACK_OFFSET    0
-#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
-
 /* optional instructions */
 #define TCG_TARGET_HAS_negsetcond_i32   0
 #define TCG_TARGET_HAS_div_i32          1
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_AREG0 = TCG_REG_S8,
 } TCGReg;
 
-/* used for function call generation */
-#define TCG_TARGET_STACK_ALIGN        16
-#if _MIPS_SIM == _ABIO32
-# define TCG_TARGET_CALL_STACK_OFFSET 16
-# define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_EVEN
-# define TCG_TARGET_CALL_RET_I128     TCG_CALL_RET_BY_REF
-#else
-# define TCG_TARGET_CALL_STACK_OFFSET 0
-# define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_NORMAL
-# define TCG_TARGET_CALL_RET_I128     TCG_CALL_RET_NORMAL
-#endif
-#define TCG_TARGET_CALL_ARG_I32       TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I128      TCG_CALL_ARG_EVEN
-
 /* MOVN/MOVZ instructions detection */
 #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
     defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_TMP2       = TCG_REG_T4,
 } TCGReg;
 
-/* used for function call generation */
-#define TCG_REG_CALL_STACK              TCG_REG_SP
-#define TCG_TARGET_STACK_ALIGN          16
-#define TCG_TARGET_CALL_STACK_OFFSET    0
-#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
-
 /* optional instructions */
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_div_i32          1
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_cmpsel_vec     1
 #define TCG_TARGET_HAS_tst_vec        0
 
-/* used for function call generation */
-#define TCG_TARGET_STACK_ALIGN		8
-#define TCG_TARGET_CALL_STACK_OFFSET	160
-#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_EXTEND
-#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_BY_REF
-#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_BY_REF
-
 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 #define TCG_TARGET_NEED_LDST_LABELS
 #define TCG_TARGET_NEED_POOL_LABELS
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_I7,
 } TCGReg;
 
-/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_O6
-
-#define TCG_TARGET_STACK_BIAS           2047
-#define TCG_TARGET_STACK_ALIGN          16
-#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
-#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_EXTEND
-#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
-#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
-
 #if defined(__VIS__) && __VIS__ >= 0x300
 #define use_vis3_instructions  1
 #else
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_CALL_STACK = TCG_REG_R15,
 } TCGReg;
 
-/* Used for function call generation. */
-#define TCG_TARGET_CALL_STACK_OFFSET    0
-#define TCG_TARGET_STACK_ALIGN          8
-#if TCG_TARGET_REG_BITS == 32
-# define TCG_TARGET_CALL_ARG_I32        TCG_CALL_ARG_EVEN
-# define TCG_TARGET_CALL_ARG_I64        TCG_CALL_ARG_EVEN
-# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
-#else
-# define TCG_TARGET_CALL_ARG_I32        TCG_CALL_ARG_NORMAL
-# define TCG_TARGET_CALL_ARG_I64        TCG_CALL_ARG_NORMAL
-# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
-#endif
-#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
-
 #define HAVE_TCG_QEMU_TB_EXEC
 #define TCG_TARGET_NEED_POOL_LABELS
 
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #include "../tcg-pool.c.inc"
 #include "qemu/bitops.h"
 
+/* Used for function call generation. */
+#define TCG_REG_CALL_STACK              TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
+#ifdef CONFIG_DARWIN
+# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
+#else
+# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
+#endif
+#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
+
 /* We're going to re-use TCGType in setting of the SF bit, which controls
    the size of the operation performed.  If we know the values match, it
    makes things much cleaner.  */
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ bool use_idiv_instructions;
 bool use_neon_instructions;
 #endif
 
+/* Used for function call generation. */
+#define TCG_TARGET_STACK_ALIGN          8
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_EVEN
+#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_EVEN
+#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_BY_REF
+
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #include "../tcg-ldst.c.inc"
 #include "../tcg-pool.c.inc"
 
+/* Used for function call generation. */
+#define TCG_TARGET_STACK_ALIGN 16
+#if defined(_WIN64)
+#define TCG_TARGET_CALL_STACK_OFFSET 32
+#else
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
+#define TCG_TARGET_CALL_ARG_I32      TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_NORMAL
+#if defined(_WIN64)
+# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_BY_REF
+# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_BY_VEC
+#elif TCG_TARGET_REG_BITS == 64
+# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_NORMAL
+# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_NORMAL
+#else
+# define TCG_TARGET_CALL_ARG_I128    TCG_CALL_ARG_NORMAL
+# define TCG_TARGET_CALL_RET_I128    TCG_CALL_RET_BY_REF
+#endif
+
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #include "../tcg-ldst.c.inc"
 #include <asm/hwcap.h>
 
+/* used for function call generation */
+#define TCG_REG_CALL_STACK              TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
+
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "zero",
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #include "../tcg-ldst.c.inc"
 #include "../tcg-pool.c.inc"
 
+/* used for function call generation */
+#define TCG_TARGET_STACK_ALIGN        16
+#if _MIPS_SIM == _ABIO32
+# define TCG_TARGET_CALL_STACK_OFFSET 16
+# define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_EVEN
+# define TCG_TARGET_CALL_RET_I128     TCG_CALL_RET_BY_REF
+#else
+# define TCG_TARGET_CALL_STACK_OFFSET 0
+# define TCG_TARGET_CALL_ARG_I64      TCG_CALL_ARG_NORMAL
+# define TCG_TARGET_CALL_RET_I128     TCG_CALL_RET_NORMAL
+#endif
+#define TCG_TARGET_CALL_ARG_I32       TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I128      TCG_CALL_ARG_EVEN
+
 #if TCG_TARGET_REG_BITS == 32
 # define LO_OFF  (HOST_BIG_ENDIAN * 4)
 # define HI_OFF  (4 - LO_OFF)
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #include "../tcg-ldst.c.inc"
 #include "../tcg-pool.c.inc"
 
+/* Used for function call generation. */
+#define TCG_REG_CALL_STACK              TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
+
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "zero", "ra",  "sp",  "gp",  "tp",  "t0",  "t1",  "t2",
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #include "../tcg-pool.c.inc"
 #include "elf.h"
 
+/* Used for function call generation. */
+#define TCG_TARGET_STACK_ALIGN          8
+#define TCG_TARGET_CALL_STACK_OFFSET    160
+#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_EXTEND
+#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_BY_REF
+#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_BY_REF
+
 #define TCG_CT_CONST_S16        (1 << 8)
 #define TCG_CT_CONST_S32        (1 << 9)
 #define TCG_CT_CONST_U32        (1 << 10)
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #include "../tcg-ldst.c.inc"
 #include "../tcg-pool.c.inc"
 
+/* Used for function call generation. */
+#define TCG_REG_CALL_STACK              TCG_REG_O6
+#define TCG_TARGET_STACK_BIAS           2047
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6 * 8 + TCG_TARGET_STACK_BIAS)
+#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_EXTEND
+#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
+#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
+
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "%g0",
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 
 #include "../tcg-pool.c.inc"
 
+/* Used for function call generation. */
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+#define TCG_TARGET_STACK_ALIGN          8
+#if TCG_TARGET_REG_BITS == 32
+# define TCG_TARGET_CALL_ARG_I32        TCG_CALL_ARG_EVEN
+# define TCG_TARGET_CALL_ARG_I64        TCG_CALL_ARG_EVEN
+# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
+#else
+# define TCG_TARGET_CALL_ARG_I32        TCG_CALL_ARG_NORMAL
+# define TCG_TARGET_CALL_ARG_I64        TCG_CALL_ARG_NORMAL
+# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
+#endif
+#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
+
 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
     switch (op) {
-- 
2.43.0

In the replacement, drop the TCGType - TCG_TYPE_V64 adjustment,
except for the call to tcg_out_vec_op.  Pass type to tcg_gen_op[1-6],
so that all integer opcodes gain the type.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h      |   2 +-
 tcg/tcg-internal.h     |  13 ++---
 tcg/optimize.c         |  10 +---
 tcg/tcg-op-ldst.c      |  26 ++++++----
 tcg/tcg-op-vec.c       |   8 +--
 tcg/tcg-op.c           | 113 +++++++++++++++++++++++------------------
 tcg/tcg.c              |  11 ++--
 docs/devel/tcg-ops.rst |  15 +++---
 8 files changed, 105 insertions(+), 93 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ struct TCGOp {
 #define TCGOP_CALLI(X)    (X)->param1
 #define TCGOP_CALLO(X)    (X)->param2
 
-#define TCGOP_VECL(X)     (X)->param1
+#define TCGOP_TYPE(X)     (X)->param1
 #define TCGOP_VECE(X)     (X)->param2
 
 /* Make sure operands fit in the bitfields above.  */
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-internal.h
+++ b/tcg/tcg-internal.h
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind);
  */
 TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
 
-TCGOp *tcg_gen_op1(TCGOpcode, TCGArg);
-TCGOp *tcg_gen_op2(TCGOpcode, TCGArg, TCGArg);
-TCGOp *tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg);
-TCGOp *tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
-TCGOp *tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
-TCGOp *tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
+TCGOp *tcg_gen_op1(TCGOpcode, TCGType, TCGArg);
+TCGOp *tcg_gen_op2(TCGOpcode, TCGType, TCGArg, TCGArg);
+TCGOp *tcg_gen_op3(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg);
+TCGOp *tcg_gen_op4(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg);
+TCGOp *tcg_gen_op5(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
+TCGOp *tcg_gen_op6(TCGOpcode, TCGType, TCGArg, TCGArg,
+                   TCGArg, TCGArg, TCGArg, TCGArg);
 
 void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
 void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
     case TCG_TYPE_V64:
     case TCG_TYPE_V128:
     case TCG_TYPE_V256:
-        /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
+        /* TCGOP_TYPE and TCGOP_VECE remain unchanged.  */
         new_op = INDEX_op_mov_vec;
         break;
     default:
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
 
         /* Pre-compute the type of the operation. */
-        if (def->flags & TCG_OPF_VECTOR) {
-            ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
-        } else if (def->flags & TCG_OPF_64BIT) {
-            ctx.type = TCG_TYPE_I64;
-        } else {
-            ctx.type = TCG_TYPE_I32;
-        }
+        ctx.type = TCGOP_TYPE(op);
 
         /*
          * Process each opcode.
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -XXX,XX +XXX,XX @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
     return op;
 }
 
-static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
+static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh,
                      TCGTemp *addr, MemOpIdx oi)
 {
     if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
         if (vh) {
-            tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
+            tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh),
+                        temp_arg(addr), oi);
         } else {
-            tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
+            tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi);
         }
     } else {
         /* See TCGV_LOW/HIGH. */
@@ -XXX,XX +XXX,XX @@ static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
         TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
 
         if (vh) {
-            tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
+            tcg_gen_op5(opc, type, temp_arg(vl), temp_arg(vh),
                         temp_arg(al), temp_arg(ah), oi);
         } else {
-            tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
+            tcg_gen_op4(opc, type, temp_arg(vl),
+                        temp_arg(al), temp_arg(ah), oi);
         }
     }
 }
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
     if (TCG_TARGET_REG_BITS == 32) {
         TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
         TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
-        gen_ldst(opc, vl, vh, addr, oi);
+        gen_ldst(opc, TCG_TYPE_I64, vl, vh, addr, oi);
     } else {
-        gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
+        gen_ldst(opc, TCG_TYPE_I64, tcgv_i64_temp(v), NULL, addr, oi);
     }
 }
 
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
     } else {
         opc = INDEX_op_qemu_ld_a64_i32;
     }
-    gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
+    gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
     plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
                                  QEMU_PLUGIN_MEM_R);
 
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
             opc = INDEX_op_qemu_st_a64_i32;
         }
     }
-    gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
+    gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
     plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
 
     if (swap) {
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
         } else {
             opc = INDEX_op_qemu_ld_a64_i128;
         }
-        gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
+        gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
+                 tcgv_i64_temp(hi), addr, oi);
 
         if (need_bswap) {
             tcg_gen_bswap64_i64(lo, lo);
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
         } else {
             opc = INDEX_op_qemu_st_a64_i128;
         }
-        gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
+        gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
+                 tcgv_i64_temp(hi), addr, oi);
 
         if (need_bswap) {
             tcg_temp_free_i64(lo);
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -XXX,XX +XXX,XX @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
 {
     TCGOp *op = tcg_emit_op(opc, 2);
-    TCGOP_VECL(op) = type - TCG_TYPE_V64;
+    TCGOP_TYPE(op) = type;
     TCGOP_VECE(op) = vece;
     op->args[0] = r;
     op->args[1] = a;
@@ -XXX,XX +XXX,XX @@ void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
                TCGArg r, TCGArg a, TCGArg b)
 {
     TCGOp *op = tcg_emit_op(opc, 3);
-    TCGOP_VECL(op) = type - TCG_TYPE_V64;
+    TCGOP_TYPE(op) = type;
     TCGOP_VECE(op) = vece;
     op->args[0] = r;
     op->args[1] = a;
@@ -XXX,XX +XXX,XX @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
                TCGArg r, TCGArg a, TCGArg b, TCGArg c)
 {
     TCGOp *op = tcg_emit_op(opc, 4);
-    TCGOP_VECL(op) = type - TCG_TYPE_V64;
+    TCGOP_TYPE(op) = type;
     TCGOP_VECE(op) = vece;
     op->args[0] = r;
     op->args[1] = a;
@@ -XXX,XX +XXX,XX @@ void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
                TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
 {
     TCGOp *op = tcg_emit_op(opc, 6);
-    TCGOP_VECL(op) = type - TCG_TYPE_V64;
+    TCGOP_TYPE(op) = type;
     TCGOP_VECE(op) = vece;
     op->args[0] = r;
     op->args[1] = a;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -XXX,XX +XXX,XX @@
  */
 #define NI  __attribute__((noinline))
 
-TCGOp * NI tcg_gen_op1(TCGOpcode opc, TCGArg a1)
+TCGOp * NI tcg_gen_op1(TCGOpcode opc, TCGType type, TCGArg a1)
 {
     TCGOp *op = tcg_emit_op(opc, 1);
+    TCGOP_TYPE(op) = type;
     op->args[0] = a1;
     return op;
 }
 
-TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
+TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2)
 {
     TCGOp *op = tcg_emit_op(opc, 2);
+    TCGOP_TYPE(op) = type;
     op->args[0] = a1;
     op->args[1] = a2;
     return op;
 }
 
-TCGOp * NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
+TCGOp * NI tcg_gen_op3(TCGOpcode opc, TCGType type, TCGArg a1,
+                       TCGArg a2, TCGArg a3)
 {
     TCGOp *op = tcg_emit_op(opc, 3);
+    TCGOP_TYPE(op) = type;
     op->args[0] = a1;
     op->args[1] = a2;
     op->args[2] = a3;
     return op;
 }
 
-TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2,
+TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
                        TCGArg a3, TCGArg a4)
 {
     TCGOp *op = tcg_emit_op(opc, 4);
+    TCGOP_TYPE(op) = type;
     op->args[0] = a1;
     op->args[1] = a2;
     op->args[2] = a3;
@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2,
     return op;
 }
 
-TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2,
+TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
                        TCGArg a3, TCGArg a4, TCGArg a5)
 {
     TCGOp *op = tcg_emit_op(opc, 5);
+    TCGOP_TYPE(op) = type;
     op->args[0] = a1;
     op->args[1] = a2;
     op->args[2] = a3;
@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2,
     return op;
 }
 
-TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
-                       TCGArg a4, TCGArg a5, TCGArg a6)
+TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
+                       TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6)
 {
     TCGOp *op = tcg_emit_op(opc, 6);
+    TCGOP_TYPE(op) = type;
     op->args[0] = a1;
     op->args[1] = a2;
     op->args[2] = a3;
@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
 # define DNI
 #endif
 
-static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
+static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGType type, TCGv_i32 a1)
 {
-    tcg_gen_op1(opc, tcgv_i32_arg(a1));
+    tcg_gen_op1(opc, type, tcgv_i32_arg(a1));
 }
 
-static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1)
+static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGType type, TCGv_i64 a1)
 {
-    tcg_gen_op1(opc, tcgv_i64_arg(a1));
+    tcg_gen_op1(opc, type, tcgv_i64_arg(a1));
 }
 
-static TCGOp * DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1)
+static TCGOp * DNI tcg_gen_op1i(TCGOpcode opc, TCGType type, TCGArg a1)
 {
-    return tcg_gen_op1(opc, a1);
+    return tcg_gen_op1(opc, type, a1);
 }
 
 static void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2)
 {
-    tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2));
+    tcg_gen_op2(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2));
 }
 
 static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2)
 {
-    tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2));
+    tcg_gen_op2(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2));
 }
 
 static void DNI tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1,
                                 TCGv_i32 a2, TCGv_i32 a3)
 {
-    tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3));
+    tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(a1),
+                tcgv_i32_arg(a2), tcgv_i32_arg(a3));
 }
 
 static void DNI tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1,
                                 TCGv_i64 a2, TCGv_i64 a3)
 {
-    tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3));
+    tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(a1),
+                tcgv_i64_arg(a2), tcgv_i64_arg(a3));
 }
 
 static void DNI tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1,
                                  TCGv_i32 a2, TCGArg a3)
 {
-    tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3);
+    tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3);
 }
 
 static void DNI tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1,
                                  TCGv_i64 a2, TCGArg a3)
 {
-    tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3);
+    tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3);
 }
 
 static void DNI tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
                                     TCGv_ptr base, TCGArg offset)
 {
-    tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset);
+    tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(val),
+                tcgv_ptr_arg(base), offset);
 }
 
 static void DNI tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
                                     TCGv_ptr base, TCGArg offset)
 {
-    tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset);
+    tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(val),
+                tcgv_ptr_arg(base), offset);
 }
 
 static void DNI tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
                                 TCGv_i32 a3, TCGv_i32 a4)
 {
-    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+    tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
                 tcgv_i32_arg(a3), tcgv_i32_arg(a4));
 }
 
 static void DNI tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
                                 TCGv_i64 a3, TCGv_i64 a4)
 {
-    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+    tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
                 tcgv_i64_arg(a3), tcgv_i64_arg(a4));
 }
 
 static void DNI tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
                                  TCGv_i32 a3, TCGArg a4)
 {
-    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+    tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
                 tcgv_i32_arg(a3), a4);
 }
 
 static void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
                                  TCGv_i64 a3, TCGArg a4)
 {
-    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+    tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
                 tcgv_i64_arg(a3), a4);
 }
 
 static TCGOp * DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
                                      TCGArg a3, TCGArg a4)
 {
-    return tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
+    return tcg_gen_op4(opc, TCG_TYPE_I32,
+                       tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
 }
 
 static TCGOp * DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
                                      TCGArg a3, TCGArg a4)
 {
-    return tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
+    return tcg_gen_op4(opc, TCG_TYPE_I64,
+                       tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
 }
 
 static void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
                                 TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5)
 {
-    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+    tcg_gen_op5(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
                 tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5));
 }
 
 static void DNI tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
                                 TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5)
 {
-    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+    tcg_gen_op5(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
                 tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5));
 }
 
 static void DNI tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
                                   TCGv_i32 a3, TCGArg a4, TCGArg a5)
 {
-    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+    tcg_gen_op5(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
                 tcgv_i32_arg(a3), a4, a5);
 }
 
 static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
                                   TCGv_i64 a3, TCGArg a4, TCGArg a5)
 {
-    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+    tcg_gen_op5(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
                 tcgv_i64_arg(a3), a4, a5);
 }
 
@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
                                 TCGv_i32 a3, TCGv_i32 a4,
                                 TCGv_i32 a5, TCGv_i32 a6)
 {
-    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+    tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
                 tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5),
                 tcgv_i32_arg(a6));
 }
@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
                                 TCGv_i64 a3, TCGv_i64 a4,
                                 TCGv_i64 a5, TCGv_i64 a6)
 {
-    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+    tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
                 tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5),
                 tcgv_i64_arg(a6));
 }
@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
                                  TCGv_i32 a3, TCGv_i32 a4,
                                  TCGv_i32 a5, TCGArg a6)
 {
-    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+    tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
                 tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6);
 }
 
@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
                                  TCGv_i64 a3, TCGv_i64 a4,
                                  TCGv_i64 a5, TCGArg a6)
 {
-    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+    tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
                 tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6);
 }
 
@@ -XXX,XX +XXX,XX @@ static TCGOp * DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
                                      TCGv_i32 a3, TCGv_i32 a4,
                                      TCGArg a5, TCGArg a6)
 {
-    return tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+    return tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
                        tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6);
 }
 
@@ -XXX,XX +XXX,XX @@ static TCGOp * DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
 void gen_set_label(TCGLabel *l)
 {
     l->present = 1;
-    tcg_gen_op1(INDEX_op_set_label, label_arg(l));
+    tcg_gen_op1(INDEX_op_set_label, 0, label_arg(l));
 }
 
 static void add_as_label_use(TCGLabel *l, TCGOp *op)
@@ -XXX,XX +XXX,XX @@ static void add_as_label_use(TCGLabel *l, TCGOp *op)
 
 void tcg_gen_br(TCGLabel *l)
 {
-    add_as_label_use(l, tcg_gen_op1(INDEX_op_br, label_arg(l)));
+    add_as_label_use(l, tcg_gen_op1(INDEX_op_br, 0, label_arg(l)));
 }
 
 void tcg_gen_mb(TCGBar mb_type)
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mb(TCGBar mb_type)
 #endif
 
     if (parallel) {
-        tcg_gen_op1(INDEX_op_mb, mb_type);
+        tcg_gen_op1(INDEX_op_mb, 0, mb_type);
     }
 }
 
 void tcg_gen_plugin_cb(unsigned from)
 {
-    tcg_gen_op1(INDEX_op_plugin_cb, from);
+    tcg_gen_op1(INDEX_op_plugin_cb, 0, from);
 }
 
 void tcg_gen_plugin_mem_cb(TCGv_i64 addr, unsigned meminfo)
 {
-    tcg_gen_op2(INDEX_op_plugin_mem_cb, tcgv_i64_arg(addr), meminfo);
+    tcg_gen_op2(INDEX_op_plugin_mem_cb, 0, tcgv_i64_arg(addr), meminfo);
 }
 
 /* 32 bit ops */
 
 void tcg_gen_discard_i32(TCGv_i32 arg)
 {
-    tcg_gen_op1_i32(INDEX_op_discard, arg);
+    tcg_gen_op1_i32(INDEX_op_discard, TCG_TYPE_I32, arg);
 }
 
 void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
@@ -XXX,XX +XXX,XX @@ void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
 void tcg_gen_discard_i64(TCGv_i64 arg)
 {
     if (TCG_TARGET_REG_BITS == 64) {
-        tcg_gen_op1_i64(INDEX_op_discard, arg);
+        tcg_gen_op1_i64(INDEX_op_discard, TCG_TYPE_I64, arg);
     } else {
         tcg_gen_discard_i32(TCGV_LOW(arg));
         tcg_gen_discard_i32(TCGV_HIGH(arg));
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
     if (TCG_TARGET_REG_BITS == 32) {
         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
     } else if (TCG_TARGET_HAS_extr_i64_i32) {
-        tcg_gen_op2(INDEX_op_extrl_i64_i32,
+        tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32,
                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
     } else {
         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
     if (TCG_TARGET_REG_BITS == 32) {
         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
     } else if (TCG_TARGET_HAS_extr_i64_i32) {
-        tcg_gen_op2(INDEX_op_extrh_i64_i32,
+        tcg_gen_op2(INDEX_op_extrh_i64_i32, TCG_TYPE_I32,
                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
     } else {
         TCGv_i64 t = tcg_temp_ebb_new_i64();
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
     } else {
-        tcg_gen_op2(INDEX_op_extu_i32_i64,
+        tcg_gen_op2(INDEX_op_extu_i32_i64, TCG_TYPE_I64,
                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
     }
 }
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
     } else {
-        tcg_gen_op2(INDEX_op_ext_i32_i64,
+        tcg_gen_op2(INDEX_op_ext_i32_i64, TCG_TYPE_I64,
                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
     }
 }
@@ -XXX,XX +XXX,XX @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
     }
 
-    tcg_gen_op1i(INDEX_op_exit_tb, val);
+    tcg_gen_op1i(INDEX_op_exit_tb, 0, val);
 }
 
 void tcg_gen_goto_tb(unsigned idx)
@@ -XXX,XX +XXX,XX @@ void tcg_gen_goto_tb(unsigned idx)
     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
 #endif
     plugin_gen_disable_mem_helpers();
-    tcg_gen_op1i(INDEX_op_goto_tb, idx);
+    tcg_gen_op1i(INDEX_op_goto_tb, 0, idx);
 }
 
 void tcg_gen_lookup_and_goto_ptr(void)
@@ -XXX,XX +XXX,XX @@ void tcg_gen_lookup_and_goto_ptr(void)
     plugin_gen_disable_mem_helpers();
     ptr = tcg_temp_ebb_new_ptr();
     gen_helper_lookup_tb_ptr(ptr, tcg_env);
-    tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
+    tcg_gen_op1i(INDEX_op_goto_ptr, TCG_TYPE_PTR, tcgv_ptr_arg(ptr));
     tcg_temp_free_ptr(ptr);
 }
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
             nb_cargs = def->nb_cargs;
 
             if (def->flags & TCG_OPF_VECTOR) {
-                col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
+                col += ne_fprintf(f, "v%d,e%d,",
+                                  8 * tcg_type_size(TCGOP_TYPE(op)),
                                   8 << TCGOP_VECE(op));
             }
 
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
 
     itype = its->type;
     vece = TCGOP_VECE(op);
-    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
+    vtype = TCGOP_TYPE(op);
 
     if (its->val_type == TEMP_VAL_CONST) {
         /* Propagate constant via movi -> dupi.  */
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
         break;
     default:
         if (def->flags & TCG_OPF_VECTOR) {
-            tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
-                           new_args, const_args);
+            tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
+                           TCGOP_VECE(op), new_args, const_args);
         } else {
             tcg_out_op(s, op->opc, new_args, const_args);
         }
@@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
 {
     const TCGLifeData arg_life = op->life;
     TCGTemp *ots, *itsl, *itsh;
-    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
+    TCGType vtype = TCGOP_TYPE(op);
 
     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
index XXXXXXX..XXXXXXX 100644
--- a/docs/devel/tcg-ops.rst
+++ b/docs/devel/tcg-ops.rst
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
 Host vector operations
 ----------------------
 
-All of the vector ops have two parameters, ``TCGOP_VECL`` & ``TCGOP_VECE``.
-The former specifies the length of the vector in log2 64-bit units; the
-latter specifies the length of the element (if applicable) in log2 8-bit units.
-E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
+All of the vector ops have two parameters, ``TCGOP_TYPE`` & ``TCGOP_VECE``.
+The former specifies the length of the vector as a TCGType; the latter
+specifies the length of the element (if applicable) in log2 8-bit units.
 
 .. list-table::
 
@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
 
    * - dup_vec *v0*, *r1*
 
-     - | Duplicate the low N bits of *r1* into VECL/VECE copies across *v0*.
+     - | Duplicate the low N bits of *r1* into TYPE/VECE copies across *v0*.
 
    * - dupi_vec *v0*, *c*
 
@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
 
    * - dup2_vec *v0*, *r1*, *r2*
 
-     - | Duplicate *r2*:*r1* into VECL/64 copies across *v0*. This opcode is
+     - | Duplicate *r2*:*r1* into TYPE/64 copies across *v0*. This opcode is
          only present for 32-bit hosts.
 
    * - add_vec *v0*, *v1*, *v2*
@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
 
        .. code-block:: c
 
-          for (i = 0; i < VECL/VECE; ++i) {
+          for (i = 0; i < TYPE/VECE; ++i) {
               v0[i] = v1[i] << s2;
           }
 
@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
 
        .. code-block:: c
 
-          for (i = 0; i < VECL/VECE; ++i) {
+          for (i = 0; i < TYPE/VECE; ++i) {
               v0[i] = v1[i] << v2[i];
           }
 
-- 
2.43.0

These are not particularly useful outside of optimization passes.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h  | 4 ----
 tcg/tcg-internal.h | 5 +++++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ void tcg_gen_call7(void *func, TCGHelperInfo *, TCGTemp *ret,
 
 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs);
 void tcg_op_remove(TCGContext *s, TCGOp *op);
-TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op,
-                            TCGOpcode opc, unsigned nargs);
-TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op,
-                           TCGOpcode opc, unsigned nargs);
 
 /**
  * tcg_remove_ops_after:
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-internal.h
+++ b/tcg/tcg-internal.h
@@ -XXX,XX +XXX,XX @@ void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
 void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
                TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e);
 
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op,
+                            TCGOpcode opc, unsigned nargs);
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op,
+                           TCGOpcode opc, unsigned nargs);
+
 #endif /* TCG_INTERNAL_H */
-- 
2.43.0

Simplify use within the optimizers by defaulting the
new opcode to the same type as the old opcode.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
                             TCGOpcode opc, unsigned nargs)
 {
     TCGOp *new_op = tcg_op_alloc(opc, nargs);
+
+    TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
     return new_op;
 }
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
                            TCGOpcode opc, unsigned nargs)
 {
     TCGOp *new_op = tcg_op_alloc(opc, nargs);
+
+    TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
     return new_op;
 }
-- 
2.43.0

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h |  7 ++++++-
 tcg/tcg.c         | 11 +++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ typedef struct TCGTargetOpDef {
     const char *args_ct_str[TCG_MAX_OP_ARGS];
 } TCGTargetOpDef;
 
-bool tcg_op_supported(TCGOpcode op);
+/*
+ * tcg_op_supported:
+ * Query if @op, for @type and @flags, is supported by the host
+ * on which we are currently executing.
+ */
+bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags);
 
 void tcg_gen_call0(void *func, TCGHelperInfo *, TCGTemp *ret);
 void tcg_gen_call1(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcgv_i32_temp(TCGv_i32 v)
 }
 #endif /* CONFIG_DEBUG_TCG */
 
-/* Return true if OP may appear in the opcode stream.
-   Test the runtime variable that controls each opcode.  */
-bool tcg_op_supported(TCGOpcode op)
+/*
+ * Return true if OP may appear in the opcode stream with TYPE.
+ * Test the runtime variable that controls each opcode.
+ */
+bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
 {
     const bool have_vec
         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
             /* fall through */
         default:
             /* Sanity check that we've not introduced any unhandled opcodes. */
-            tcg_debug_assert(tcg_op_supported(opc));
+            tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
+                                              TCGOP_FLAGS(op)));
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-- 
2.43.0

Rely on tcg-op-vec.c to expand the opcode if missing.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-sve.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -XXX,XX +XXX,XX @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
                           TCGv_vec m, TCGv_vec k)
 {
-    if (TCG_TARGET_HAS_bitsel_vec) {
-        tcg_gen_not_vec(vece, n, n);
-        tcg_gen_bitsel_vec(vece, d, k, n, m);
-    } else {
-        tcg_gen_andc_vec(vece, n, k, n);
-        tcg_gen_andc_vec(vece, m, m, k);
-        tcg_gen_or_vec(vece, d, n, m);
-    }
+    tcg_gen_not_vec(vece, n, n);
+    tcg_gen_bitsel_vec(vece, d, k, n, m);
 }
 
 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
@@ -XXX,XX +XXX,XX @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
                           TCGv_vec m, TCGv_vec k)
 {
-    if (TCG_TARGET_HAS_bitsel_vec) {
-        tcg_gen_not_vec(vece, m, m);
-        tcg_gen_bitsel_vec(vece, d, k, n, m);
-    } else {
-        tcg_gen_and_vec(vece, n, n, k);
-        tcg_gen_or_vec(vece, m, m, k);
-        tcg_gen_orc_vec(vece, d, n, m);
-    }
+    tcg_gen_not_vec(vece, m, m);
+    tcg_gen_bitsel_vec(vece, d, k, n, m);
 }
 
 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
-- 
2.43.0

Do not reference TCG_TARGET_HAS_* directly.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-a64.c | 10 ++++++----
 target/arm/tcg/translate-sve.c |  2 +-
 target/arm/tcg/translate.c     |  2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
     TCGv_i64 tcg_rn, tcg_y;
     DisasCompare c;
     unsigned nzcv;
+    bool has_andc;
 
     /* Set T0 = !COND.  */
     arm_test_cc(&c, a->cond);
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
 
     nzcv = a->nzcv;
+    has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
     if (nzcv & 8) { /* N */
         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
     } else {
-        if (TCG_TARGET_HAS_andc_i32) {
+        if (has_andc) {
             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
         } else {
             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
         }
     }
     if (nzcv & 4) { /* Z */
-        if (TCG_TARGET_HAS_andc_i32) {
+        if (has_andc) {
             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
         } else {
             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
     if (nzcv & 2) { /* C */
         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
     } else {
-        if (TCG_TARGET_HAS_andc_i32) {
+        if (has_andc) {
             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
         } else {
             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
     if (nzcv & 1) { /* V */
         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
     } else {
-        if (TCG_TARGET_HAS_andc_i32) {
+        if (has_andc) {
             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
         } else {
             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -XXX,XX +XXX,XX @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
      *       =         | ~(m | k)
      */
     tcg_gen_and_i64(n, n, k);
-    if (TCG_TARGET_HAS_orc_i64) {
+    if (tcg_op_supported(INDEX_op_orc_i64, TCG_TYPE_I64, 0)) {
         tcg_gen_or_i64(m, m, k);
         tcg_gen_orc_i64(d, n, m);
     } else {
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
-    if (TCG_TARGET_HAS_add2_i32) {
+    if (tcg_op_supported(INDEX_op_add2_i32, TCG_TYPE_I32, 0)) {
         tcg_gen_movi_i32(tmp, 0);
         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
-- 
2.43.0

Do not reference TCG_TARGET_HAS_* directly.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/tricore/translate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/tricore/translate.c b/target/tricore/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/tricore/translate.c
+++ b/target/tricore/translate.c
@@ -XXX,XX +XXX,XX @@ static void decode_bit_andacc(DisasContext *ctx)
                     pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_and_tl);
         break;
     case OPC2_32_BIT_AND_NOR_T:
-        if (TCG_TARGET_HAS_andc_i32) {
+        if (tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0)) {
             gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
                         pos1, pos2, &tcg_gen_or_tl, &tcg_gen_andc_tl);
         } else {
@@ -XXX,XX +XXX,XX @@ static void decode_bit_orand(DisasContext *ctx)
                     pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_or_tl);
         break;
     case OPC2_32_BIT_OR_NOR_T:
-        if (TCG_TARGET_HAS_orc_i32) {
+        if (tcg_op_supported(INDEX_op_orc_i32, TCG_TYPE_I32, 0)) {
             gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
                         pos1, pos2, &tcg_gen_or_tl, &tcg_gen_orc_tl);
         } else {
-- 
2.43.0

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h |  6 ++++++
 tcg/tcg.c         | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ typedef struct TCGTargetOpDef {
  * on which we are currently executing.
  */
 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags);
+/*
+ * tcg_op_deposit_valid:
+ * Query if a deposit into (ofs, len) is supported for @type by
+ * the host on which we are currently executing.
+ */
+bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len);
 
 void tcg_gen_call0(void *func, TCGHelperInfo *, TCGTemp *ret);
 void tcg_gen_call1(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     }
 }
 
+bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    tcg_debug_assert(len > 0);
+    switch (type) {
+    case TCG_TYPE_I32:
+        tcg_debug_assert(ofs < 32);
+        tcg_debug_assert(len <= 32);
+        tcg_debug_assert(ofs + len <= 32);
+        return TCG_TARGET_HAS_deposit_i32 &&
+               TCG_TARGET_deposit_i32_valid(ofs, len);
+    case TCG_TYPE_I64:
+        tcg_debug_assert(ofs < 64);
+        tcg_debug_assert(len <= 64);
+        tcg_debug_assert(ofs + len <= 64);
+        return TCG_TARGET_HAS_deposit_i64 &&
+               TCG_TARGET_deposit_i64_valid(ofs, len);
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
 
 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
-- 
2.43.0

Avoid direct usage of TCG_TARGET_deposit_*_valid.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/tcg/emit.c.inc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -XXX,XX +XXX,XX @@
  */
 #ifdef TARGET_X86_64
 #define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i64
-#define TCG_TARGET_deposit_tl_valid     TCG_TARGET_deposit_i64_valid
 #else
 #define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i32
-#define TCG_TARGET_deposit_tl_valid     TCG_TARGET_deposit_i32_valid
 #endif
 
 #define MMX_OFFSET(reg)                        \
@@ -XXX,XX +XXX,XX @@ static void gen_RCL(DisasContext *s, X86DecodedInsn *decode)
     }
 
     /* Compute high part, including incoming carry.  */
-    if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
+    if (!have_1bit_cin || tcg_op_deposit_valid(TCG_TYPE_TL, 1, TARGET_LONG_BITS - 1)) {
         /* high = (T0 << 1) | cin */
         TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
         tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
@@ -XXX,XX +XXX,XX @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
     }
 
     /* Save incoming carry into high, it will be shifted later.  */
-    if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
+    if (!have_1bit_cin || tcg_op_deposit_valid(TCG_TYPE_TL, 1, TARGET_LONG_BITS - 1)) {
         TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
         tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
     } else {
-- 
2.43.0

Do not reference TCG_TARGET_HAS_* directly.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/tcg/emit.c.inc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -XXX,XX +XXX,XX @@
  * The exact opcode to check depends on 32- vs. 64-bit.
  */
 #ifdef TARGET_X86_64
-#define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i64
+#define INDEX_op_extract2_tl            INDEX_op_extract2_i64
 #else
-#define TCG_TARGET_HAS_extract2_tl      TCG_TARGET_HAS_extract2_i32
+#define INDEX_op_extract2_tl            INDEX_op_extract2_i32
 #endif
 
 #define MMX_OFFSET(reg)                        \
@@ -XXX,XX +XXX,XX @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode)
     tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1)));
     while (vec_len > 8) {
         vec_len -= 8;
-        if (TCG_TARGET_HAS_extract2_tl) {
+        if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) {
             /*
              * Load the next byte of the result into the high byte of T.
              * TCG does a similar expansion of deposit to shl+extract2; by
-- 
2.43.0

Make these features unconditional, as they're used by most
tcg backends anyway.  Merge tcg-ldst.c.inc and tcg-pool.c.inc
into tcg.c and mark some of the functions unused, so that
when the features are not used we won't get Werrors.

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
     CPUState *cpu;                      /* *_trans */
 
     /* These structures are private to tcg-target.c.inc.  */
-#ifdef TCG_TARGET_NEED_LDST_LABELS
     QSIMPLEQ_HEAD(, TCGLabelQemuLdst) ldst_labels;
-#endif
-#ifdef TCG_TARGET_NEED_POOL_LABELS
     struct TCGLabelPoolData *pool_labels;
-#endif
 
     TCGLabel *exitreq_label;
 
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 #define TCG_TARGET_HAS_tst_vec          1
 
 #define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_NEED_LDST_LABELS
-#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_tst_vec          1
 
 #define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_NEED_LDST_LABELS
-#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 #include "tcg/tcg-mo.h"
 
 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
-#define TCG_TARGET_NEED_LDST_LABELS
-#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
-#define TCG_TARGET_NEED_LDST_LABELS
-
 #endif /* LOONGARCH_TCG_TARGET_H */
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_tst              0
 
 #define TCG_TARGET_DEFAULT_MO           0
-#define TCG_TARGET_NEED_LDST_LABELS
-#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 #define TCG_TARGET_HAS_tst_vec          0
 
 #define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_NEED_LDST_LABELS
-#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
-#define TCG_TARGET_NEED_LDST_LABELS
-#define TCG_TARGET_NEED_POOL_LABELS
-
 #endif
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_tst_vec        0
 
 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
-#define TCG_TARGET_NEED_LDST_LABELS
-#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 #define TCG_AREG0 TCG_REG_I0
 
 #define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_NEED_LDST_LABELS
-#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s);
 static void tcg_target_qemu_prologue(TCGContext *s);
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend);
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
+
+typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
 
 /* The CIE and FDE header definitions will be common to all hosts.  */
 typedef struct {
@@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED {
     DebugFrameFDEHeader fde;
 } DebugFrameHeader;
 
-typedef struct TCGLabelQemuLdst {
+struct TCGLabelQemuLdst {
     bool is_ld;             /* qemu_ld: true, qemu_st: false */
     MemOpIdx oi;
     TCGType type;           /* result type of a load */
@@ -XXX,XX +XXX,XX @@ typedef struct TCGLabelQemuLdst {
     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
-} TCGLabelQemuLdst;
+};
 
 static void tcg_register_jit_int(const void *buf, size_t size,
                                  const void *debug_frame,
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
 static bool tcg_target_const_match(int64_t val, int ct,
                                    TCGType type, TCGCond cond, int vece);
-#ifdef TCG_TARGET_NEED_LDST_LABELS
-static int tcg_out_ldst_finalize(TCGContext *s);
-#endif
 
 #ifndef CONFIG_USER_ONLY
 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
     }
 }
 
+/*
+ * Allocate a new TCGLabelQemuLdst entry.
+ */
+
+__attribute__((unused))
+static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
+{
+    TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
+
+    memset(l, 0, sizeof(*l));
+    QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
+
+    return l;
+}
+
+/*
+ * Allocate new constant pool entries.
+ */
+
+typedef struct TCGLabelPoolData {
+    struct TCGLabelPoolData *next;
+    tcg_insn_unit *label;
+    intptr_t addend;
+    int rtype;
+    unsigned nlong;
+    tcg_target_ulong data[];
+} TCGLabelPoolData;
+
+static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
+                                        tcg_insn_unit *label, intptr_t addend)
+{
+    TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
+                                     + sizeof(tcg_target_ulong) * nlong);
+
+    n->label = label;
+    n->addend = addend;
+    n->rtype = rtype;
+    n->nlong = nlong;
+    return n;
+}
+
+static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
+{
+    TCGLabelPoolData *i, **pp;
+    int nlong = n->nlong;
+
+    /* Insertion sort on the pool.  */
+    for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
+        if (nlong > i->nlong) {
+            break;
+        }
+        if (nlong < i->nlong) {
+            continue;
+        }
+        if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
+            break;
+        }
+    }
+    n->next = *pp;
+    *pp = n;
+}
+
+/* The "usual" for generic integer code.  */
+__attribute__((unused))
+static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
+                           tcg_insn_unit *label, intptr_t addend)
+{
+    TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
+    n->data[0] = d;
+    new_pool_insert(s, n);
+}
+
+/* For v64 or v128, depending on the host.  */
+__attribute__((unused))
+static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
+                        intptr_t addend, tcg_target_ulong d0,
+                        tcg_target_ulong d1)
+{
+    TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
+    n->data[0] = d0;
+    n->data[1] = d1;
+    new_pool_insert(s, n);
+}
+
+/* For v128 or v256, depending on the host.  */
+__attribute__((unused))
+static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
+                        intptr_t addend, tcg_target_ulong d0,
+                        tcg_target_ulong d1, tcg_target_ulong d2,
+                        tcg_target_ulong d3)
+{
+    TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
+    n->data[0] = d0;
+    n->data[1] = d1;
+    n->data[2] = d2;
+    n->data[3] = d3;
+    new_pool_insert(s, n);
+}
+
+/* For v256, for 32-bit host.  */
+__attribute__((unused))
+static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
+                        intptr_t addend, tcg_target_ulong d0,
+                        tcg_target_ulong d1, tcg_target_ulong d2,
+                        tcg_target_ulong d3, tcg_target_ulong d4,
+                        tcg_target_ulong d5, tcg_target_ulong d6,
+                        tcg_target_ulong d7)
+{
+    TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
+    n->data[0] = d0;
+    n->data[1] = d1;
+    n->data[2] = d2;
+    n->data[3] = d3;
+    n->data[4] = d4;
+    n->data[5] = d5;
+    n->data[6] = d6;
+    n->data[7] = d7;
+    new_pool_insert(s, n);
+}
+
+/*
+ * Generate TB finalization at the end of block
+ */
+
+static int tcg_out_ldst_finalize(TCGContext *s)
+{
+    TCGLabelQemuLdst *lb;
+
+    /* qemu_ld/st slow paths */
+    QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
+        if (lb->is_ld
+            ? !tcg_out_qemu_ld_slow_path(s, lb)
+            : !tcg_out_qemu_st_slow_path(s, lb)) {
+            return -2;
+        }
+
+        /*
+         * Test for (pending) buffer overflow.  The assumption is that any
+         * one operation beginning below the high water mark cannot overrun
+         * the buffer completely.  Thus we can test for overflow after
+         * generating code without having to check during generation.
+         */
+        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int tcg_out_pool_finalize(TCGContext *s)
+{
+    TCGLabelPoolData *p = s->pool_labels;
+    TCGLabelPoolData *l = NULL;
+    void *a;
+
+    if (p == NULL) {
+        return 0;
+    }
+
+    /*
+     * ??? Round up to qemu_icache_linesize, but then do not round
+     * again when allocating the next TranslationBlock structure.
+     */
+    a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
+                         sizeof(tcg_target_ulong) * p->nlong);
+    tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
+    s->data_gen_ptr = a;
+
+    for (; p != NULL; p = p->next) {
+        size_t size = sizeof(tcg_target_ulong) * p->nlong;
+        uintptr_t value;
+
+        if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
+            if (unlikely(a > s->code_gen_highwater)) {
+                return -1;
+            }
+            memcpy(a, p->data, size);
+            a += size;
+            l = p;
+        }
+
+        value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
+        if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
+            return -2;
+        }
+    }
+
+    s->code_ptr = a;
+    return 0;
+}
+
 #define C_PFX1(P, A)                    P##A
 #define C_PFX2(P, A, B)                 P##A##_##B
 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     s->code_ptr = s->code_buf;
     s->data_gen_ptr = NULL;
 
-#ifdef TCG_TARGET_NEED_LDST_LABELS
     QSIMPLEQ_INIT(&s->ldst_labels);
-#endif
-#ifdef TCG_TARGET_NEED_POOL_LABELS
     s->pool_labels = NULL;
-#endif
 
     start_words = s->insn_start_words;
     s->gen_insn_data =
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
 
     /* Generate TB finalization at the end of block */
-#ifdef TCG_TARGET_NEED_LDST_LABELS
     i = tcg_out_ldst_finalize(s);
     if (i < 0) {
         return i;
     }
-#endif
-#ifdef TCG_TARGET_NEED_POOL_LABELS
     i = tcg_out_pool_finalize(s);
     if (i < 0) {
         return i;
     }
-#endif
     if (!tcg_resolve_relocs(s)) {
         return -2;
     }
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  * See the COPYING file in the top-level directory for details.
  */
 
-#include "../tcg-ldst.c.inc"
-#include "../tcg-pool.c.inc"
 #include "qemu/bitops.h"
 
 /* Used for function call generation. */
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  */
 
 #include "elf.h"
-#include "../tcg-ldst.c.inc"
-#include "../tcg-pool.c.inc"
 
 int arm_arch = __ARM_ARCH;
 
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  * THE SOFTWARE.
  */
 
-#include "../tcg-ldst.c.inc"
-#include "../tcg-pool.c.inc"
-
 /* Used for function call generation. */
 #define TCG_TARGET_STACK_ALIGN 16
 #if defined(_WIN64)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  * THE SOFTWARE.
  */
 
-#include "../tcg-ldst.c.inc"
 #include <asm/hwcap.h>
 
 /* used for function call generation */
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tb_start(TCGContext *s)
     /* nothing to do */
 }
 
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+    for (int i = 0; i < count; ++i) {
+        /* Canonical nop is andi r0,r0,0 */
+        p[i] = OPC_ANDI;
+    }
+}
+
 static void tcg_target_init(TCGContext *s)
 {
     unsigned long hwcap = qemu_getauxval(AT_HWCAP);
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  * THE SOFTWARE.
  */
 
-#include "../tcg-ldst.c.inc"
-#include "../tcg-pool.c.inc"
-
 /* used for function call generation */
 #define TCG_TARGET_STACK_ALIGN        16
 #if _MIPS_SIM == _ABIO32
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  */
 
 #include "elf.h"
-#include "../tcg-pool.c.inc"
-#include "../tcg-ldst.c.inc"
 
 /*
  * Standardize on the _CALL_FOO symbols used by GCC:
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  * THE SOFTWARE.
  */
 
-#include "../tcg-ldst.c.inc"
-#include "../tcg-pool.c.inc"
-
 /* Used for function call generation. */
 #define TCG_REG_CALL_STACK              TCG_REG_SP
 #define TCG_TARGET_STACK_ALIGN          16
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  * THE SOFTWARE.
  */
 
-#include "../tcg-ldst.c.inc"
-#include "../tcg-pool.c.inc"
 #include "elf.h"
 
 /* Used for function call generation. */
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #error "unsupported code generation mode"
 #endif
 
-#include "../tcg-ldst.c.inc"
-#include "../tcg-pool.c.inc"
-
 /* Used for function call generation. */
 #define TCG_REG_CALL_STACK              TCG_REG_O6
 #define TCG_TARGET_STACK_BIAS           2047
diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
deleted file mode 100644
index XXXXXXX..XXXXXXX
--- a/tcg/tcg-ldst.c.inc
+++ /dev/null
@@ -XXX,XX +XXX,XX @@
-/*
- * TCG Backend Data: load-store optimization only.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-/*
- * Generate TB finalization at the end of block
- */
-
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
-
-static int tcg_out_ldst_finalize(TCGContext *s)
-{
-    TCGLabelQemuLdst *lb;
-
-    /* qemu_ld/st slow paths */
-    QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
-        if (lb->is_ld
-            ? !tcg_out_qemu_ld_slow_path(s, lb)
-            : !tcg_out_qemu_st_slow_path(s, lb)) {
-            return -2;
-        }
-
-        /* Test for (pending) buffer overflow.  The assumption is that any
-           one operation beginning below the high water mark cannot overrun
-           the buffer completely.  Thus we can test for overflow after
-           generating code without having to check during generation.  */
-        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
-            return -1;
-        }
-    }
-    return 0;
-}
-
-/*
- * Allocate a new TCGLabelQemuLdst entry.
- */
-
-static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
-{
-    TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
-
-    memset(l, 0, sizeof(*l));
-    QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
-
-    return l;
-}
diff --git a/tcg/tcg-pool.c.inc b/tcg/tcg-pool.c.inc
deleted file mode 100644
index XXXXXXX..XXXXXXX
--- a/tcg/tcg-pool.c.inc
+++ /dev/null
@@ -XXX,XX +XXX,XX @@
-/*
- * TCG Backend Data: constant pool.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-typedef struct TCGLabelPoolData {
-    struct TCGLabelPoolData *next;
-    tcg_insn_unit *label;
-    intptr_t addend;
-    int rtype;
-    unsigned nlong;
-    tcg_target_ulong data[];
-} TCGLabelPoolData;
-
-
-static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
-                                        tcg_insn_unit *label, intptr_t addend)
-{
-    TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
-                                     + sizeof(tcg_target_ulong) * nlong);
-
-    n->label = label;
-    n->addend = addend;
-    n->rtype = rtype;
-    n->nlong = nlong;
-    return n;
-}
-
-static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
-{
-    TCGLabelPoolData *i, **pp;
-    int nlong = n->nlong;
-
-    /* Insertion sort on the pool.  */
-    for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
-        if (nlong > i->nlong) {
-            break;
-        }
-        if (nlong < i->nlong) {
-            continue;
-        }
-        if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
-            break;
-        }
-    }
-    n->next = *pp;
-    *pp = n;
-}
-
-/* The "usual" for generic integer code.  */
-static inline void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
-                                  tcg_insn_unit *label, intptr_t addend)
-{
-    TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
-    n->data[0] = d;
-    new_pool_insert(s, n);
-}
-
-/* For v64 or v128, depending on the host.  */
-static inline void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
-                               intptr_t addend, tcg_target_ulong d0,
-                               tcg_target_ulong d1)
-{
-    TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
-    n->data[0] = d0;
-    n->data[1] = d1;
-    new_pool_insert(s, n);
-}
-
-/* For v128 or v256, depending on the host.  */
-static inline void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
-                               intptr_t addend, tcg_target_ulong d0,
-                               tcg_target_ulong d1, tcg_target_ulong d2,
-                               tcg_target_ulong d3)
-{
-    TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
-    n->data[0] = d0;
-    n->data[1] = d1;
-    n->data[2] = d2;
-    n->data[3] = d3;
-    new_pool_insert(s, n);
-}
-
-/* For v256, for 32-bit host.  */
-static inline void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
-                               intptr_t addend, tcg_target_ulong d0,
-                               tcg_target_ulong d1, tcg_target_ulong d2,
-                               tcg_target_ulong d3, tcg_target_ulong d4,
-                               tcg_target_ulong d5, tcg_target_ulong d6,
-                               tcg_target_ulong d7)
-{
-    TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
-    n->data[0] = d0;
-    n->data[1] = d1;
-    n->data[2] = d2;
-    n->data[3] = d3;
-    n->data[4] = d4;
-    n->data[5] = d5;
-    n->data[6] = d6;
-    n->data[7] = d7;
-    new_pool_insert(s, n);
-}
-
-/* To be provided by cpu/tcg-target.c.inc.  */
-static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
-
-static int tcg_out_pool_finalize(TCGContext *s)
-{
-    TCGLabelPoolData *p = s->pool_labels;
-    TCGLabelPoolData *l = NULL;
-    void *a;
-
-    if (p == NULL) {
-        return 0;
-    }
-
-    /* ??? Round up to qemu_icache_linesize, but then do not round
-       again when allocating the next TranslationBlock structure.  */
-    a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
-                         sizeof(tcg_target_ulong) * p->nlong);
-    tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
-    s->data_gen_ptr = a;
-
-    for (; p != NULL; p = p->next) {
-        size_t size = sizeof(tcg_target_ulong) * p->nlong;
-        uintptr_t value;
-
-        if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
-            if (unlikely(a > s->code_gen_highwater)) {
-                return -1;
-            }
-            memcpy(a, p->data, size);
-            a += size;
-            l = p;
-        }
-
-        value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
-        if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
-            return -2;
-        }
-    }
-
-    s->code_ptr = a;
-    return 0;
-}
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
  * THE SOFTWARE.
  */
 
-#include "../tcg-pool.c.inc"
-
 /* Used for function call generation. */
 #define TCG_TARGET_CALL_STACK_OFFSET    0
 #define TCG_TARGET_STACK_ALIGN          8
@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
 {
     return true;
 }
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    g_assert_not_reached();
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    g_assert_not_reached();
+}
-- 
2.43.0

In addition, add empty files for mips, sparc64 and tci.
Make the include unconditional within tcg-opc.h.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-opc.h                                      | 4 +---
 tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc}     | 0
 tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc}         | 0
 tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc}        | 0
 tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
 tcg/mips/tcg-target-opc.h.inc                              | 1 +
 tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc}         | 0
 tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc}       | 0
 tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc}       | 0
 tcg/sparc64/tcg-target-opc.h.inc                           | 1 +
 tcg/tci/tcg-target-opc.h.inc                               | 1 +
 11 files changed, 4 insertions(+), 3 deletions(-)
 rename tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 rename tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 rename tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 rename tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 create mode 100644 tcg/mips/tcg-target-opc.h.inc
 rename tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 rename tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 rename tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
 create mode 100644 tcg/sparc64/tcg-target-opc.h.inc
 create mode 100644 tcg/tci/tcg-target-opc.h.inc

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -XXX,XX +XXX,XX @@ DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec))
 
 DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
 
-#if TCG_TARGET_MAYBE_vec
-#include "tcg-target.opc.h"
-#endif
+#include "tcg-target-opc.h.inc"
 
 #ifdef TCG_TARGET_INTERPRETER
 /* These opcodes are only for use between the tci generator and interpreter. */
diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target-opc.h.inc
similarity index 100%
rename from tcg/aarch64/tcg-target.opc.h
rename to tcg/aarch64/tcg-target-opc.h.inc
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target-opc.h.inc
similarity index 100%
rename from tcg/arm/tcg-target.opc.h
rename to tcg/arm/tcg-target-opc.h.inc
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target-opc.h.inc
similarity index 100%
rename from tcg/i386/tcg-target.opc.h
rename to tcg/i386/tcg-target-opc.h.inc
diff --git a/tcg/loongarch64/tcg-target.opc.h b/tcg/loongarch64/tcg-target-opc.h.inc
similarity index 100%
rename from tcg/loongarch64/tcg-target.opc.h
rename to tcg/loongarch64/tcg-target-opc.h.inc
diff --git a/tcg/mips/tcg-target-opc.h.inc b/tcg/mips/tcg-target-opc.h.inc
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/mips/tcg-target-opc.h.inc
@@ -0,0 +1 @@
+/* No target specific opcodes. */
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target-opc.h.inc
similarity index 100%
rename from tcg/ppc/tcg-target.opc.h
rename to tcg/ppc/tcg-target-opc.h.inc
diff --git a/tcg/riscv/tcg-target.opc.h b/tcg/riscv/tcg-target-opc.h.inc
similarity index 100%
rename from tcg/riscv/tcg-target.opc.h
rename to tcg/riscv/tcg-target-opc.h.inc
diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target-opc.h.inc
similarity index 100%
rename from tcg/s390x/tcg-target.opc.h
rename to tcg/s390x/tcg-target-opc.h.inc
diff --git a/tcg/sparc64/tcg-target-opc.h.inc b/tcg/sparc64/tcg-target-opc.h.inc
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/sparc64/tcg-target-opc.h.inc
@@ -0,0 +1 @@
+/* No target specific opcodes. */
diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/tci/tcg-target-opc.h.inc
@@ -0,0 +1 @@
+/* No target specific opcodes. */
-- 
2.43.0

Now that tcg-target-opc.h.inc is unconditional,
we can move these out of the generic header.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-opc.h        | 6 ------
 tcg/tci/tcg-target-opc.h.inc | 5 ++++-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -XXX,XX +XXX,XX @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
 
 #include "tcg-target-opc.h.inc"
 
-#ifdef TCG_TARGET_INTERPRETER
-/* These opcodes are only for use between the tci generator and interpreter. */
-DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
-DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
-#endif
-
 #undef DATA64_ARGS
 #undef IMPL
 #undef IMPL64
diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target-opc.h.inc
+++ b/tcg/tci/tcg-target-opc.h.inc
@@ -1 +1,4 @@
-/* No target specific opcodes. */
+/* SPDX-License-Identifier: MIT */
+/* These opcodes for use between the tci generator and interpreter. */
+DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
+DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
-- 
2.43.0

Don't reference TCG_TARGET_MAYBE_vec in a public header.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h | 7 -------
 tcg/tcg.c         | 4 ++++
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ extern tcg_prologue_fn *tcg_qemu_tb_exec;
 
 void tcg_register_jit(const void *buf, size_t buf_size);
 
-#if TCG_TARGET_MAYBE_vec
 /* Return zero if the tuple (opc, type, vece) is unsupportable;
    return > 0 if it is directly supportable;
    return < 0 if we must call tcg_expand_vec_op.  */
 int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned);
-#else
-static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
-{
-    return 0;
-}
-#endif
 
 /* Expand the tuple (opc, type, vece) on the given arguments.  */
 void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 {
     g_assert_not_reached();
 }
+int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
+{
+    return 0;
+}
 #endif
 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
                        intptr_t arg2);
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-3-philmd@linaro.org>
---
 include/tcg/tcg.h | 105 +-----------------------------------------
 tcg/tcg-has.h     | 115 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 104 deletions(-)
 create mode 100644 tcg/tcg-has.h

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
 #error unsupported
 #endif
 
-#if TCG_TARGET_REG_BITS == 32
-/* Turn some undef macros into false macros.  */
-#define TCG_TARGET_HAS_extr_i64_i32     0
-#define TCG_TARGET_HAS_div_i64          0
-#define TCG_TARGET_HAS_rem_i64          0
-#define TCG_TARGET_HAS_div2_i64         0
-#define TCG_TARGET_HAS_rot_i64          0
-#define TCG_TARGET_HAS_ext8s_i64        0
-#define TCG_TARGET_HAS_ext16s_i64       0
-#define TCG_TARGET_HAS_ext32s_i64       0
-#define TCG_TARGET_HAS_ext8u_i64        0
-#define TCG_TARGET_HAS_ext16u_i64       0
-#define TCG_TARGET_HAS_ext32u_i64       0
-#define TCG_TARGET_HAS_bswap16_i64      0
-#define TCG_TARGET_HAS_bswap32_i64      0
-#define TCG_TARGET_HAS_bswap64_i64      0
-#define TCG_TARGET_HAS_not_i64          0
-#define TCG_TARGET_HAS_andc_i64         0
-#define TCG_TARGET_HAS_orc_i64          0
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_clz_i64          0
-#define TCG_TARGET_HAS_ctz_i64          0
-#define TCG_TARGET_HAS_ctpop_i64        0
-#define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      0
-#define TCG_TARGET_HAS_sextract_i64     0
-#define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_negsetcond_i64   0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i64        0
-#define TCG_TARGET_HAS_muluh_i64        0
-#define TCG_TARGET_HAS_mulsh_i64        0
-/* Turn some undef macros into true macros.  */
-#define TCG_TARGET_HAS_add2_i32         1
-#define TCG_TARGET_HAS_sub2_i32         1
-#endif
-
-#ifndef TCG_TARGET_deposit_i32_valid
-#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
-#endif
-#ifndef TCG_TARGET_deposit_i64_valid
-#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
-#endif
-#ifndef TCG_TARGET_extract_i32_valid
-#define TCG_TARGET_extract_i32_valid(ofs, len) 1
-#endif
-#ifndef TCG_TARGET_extract_i64_valid
-#define TCG_TARGET_extract_i64_valid(ofs, len) 1
-#endif
-
-/* Only one of DIV or DIV2 should be defined.  */
-#if defined(TCG_TARGET_HAS_div_i32)
-#define TCG_TARGET_HAS_div2_i32         0
-#elif defined(TCG_TARGET_HAS_div2_i32)
-#define TCG_TARGET_HAS_div_i32          0
-#define TCG_TARGET_HAS_rem_i32          0
-#endif
-#if defined(TCG_TARGET_HAS_div_i64)
-#define TCG_TARGET_HAS_div2_i64         0
-#elif defined(TCG_TARGET_HAS_div2_i64)
-#define TCG_TARGET_HAS_div_i64          0
-#define TCG_TARGET_HAS_rem_i64          0
-#endif
-
-#if !defined(TCG_TARGET_HAS_v64) \
-    && !defined(TCG_TARGET_HAS_v128) \
-    && !defined(TCG_TARGET_HAS_v256)
-#define TCG_TARGET_MAYBE_vec            0
-#define TCG_TARGET_HAS_abs_vec          0
-#define TCG_TARGET_HAS_neg_vec          0
-#define TCG_TARGET_HAS_not_vec          0
-#define TCG_TARGET_HAS_andc_vec         0
-#define TCG_TARGET_HAS_orc_vec          0
-#define TCG_TARGET_HAS_nand_vec         0
-#define TCG_TARGET_HAS_nor_vec          0
-#define TCG_TARGET_HAS_eqv_vec          0
-#define TCG_TARGET_HAS_roti_vec         0
-#define TCG_TARGET_HAS_rots_vec         0
-#define TCG_TARGET_HAS_rotv_vec         0
-#define TCG_TARGET_HAS_shi_vec          0
-#define TCG_TARGET_HAS_shs_vec          0
-#define TCG_TARGET_HAS_shv_vec          0
-#define TCG_TARGET_HAS_mul_vec          0
-#define TCG_TARGET_HAS_sat_vec          0
-#define TCG_TARGET_HAS_minmax_vec       0
-#define TCG_TARGET_HAS_bitsel_vec       0
-#define TCG_TARGET_HAS_cmpsel_vec       0
-#define TCG_TARGET_HAS_tst_vec          0
-#else
-#define TCG_TARGET_MAYBE_vec            1
-#endif
-#ifndef TCG_TARGET_HAS_v64
-#define TCG_TARGET_HAS_v64              0
-#endif
-#ifndef TCG_TARGET_HAS_v128
-#define TCG_TARGET_HAS_v128             0
-#endif
-#ifndef TCG_TARGET_HAS_v256
-#define TCG_TARGET_HAS_v256             0
-#endif
+#include "tcg/tcg-has.h"
 
 typedef enum TCGOpcode {
 #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/tcg-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2024 Linaro, Ltd.
+ */
+
+#ifndef TCG_HAS_H
+#define TCG_HAS_H
+
+#if TCG_TARGET_REG_BITS == 32
+/* Turn some undef macros into false macros.  */
+#define TCG_TARGET_HAS_extr_i64_i32     0
+#define TCG_TARGET_HAS_div_i64          0
+#define TCG_TARGET_HAS_rem_i64          0
+#define TCG_TARGET_HAS_div2_i64         0
+#define TCG_TARGET_HAS_rot_i64          0
+#define TCG_TARGET_HAS_ext8s_i64        0
+#define TCG_TARGET_HAS_ext16s_i64       0
+#define TCG_TARGET_HAS_ext32s_i64       0
+#define TCG_TARGET_HAS_ext8u_i64        0
+#define TCG_TARGET_HAS_ext16u_i64       0
+#define TCG_TARGET_HAS_ext32u_i64       0
+#define TCG_TARGET_HAS_bswap16_i64      0
+#define TCG_TARGET_HAS_bswap32_i64      0
+#define TCG_TARGET_HAS_bswap64_i64      0
+#define TCG_TARGET_HAS_not_i64          0
+#define TCG_TARGET_HAS_andc_i64         0
+#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          0
+#define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_negsetcond_i64   0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        0
+#define TCG_TARGET_HAS_mulsh_i64        0
+/* Turn some undef macros into true macros.  */
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#endif
+
+#ifndef TCG_TARGET_deposit_i32_valid
+#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_deposit_i64_valid
+#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_extract_i32_valid
+#define TCG_TARGET_extract_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_extract_i64_valid
+#define TCG_TARGET_extract_i64_valid(ofs, len) 1
+#endif
+
+/* Only one of DIV or DIV2 should be defined.  */
+#if defined(TCG_TARGET_HAS_div_i32)
+#define TCG_TARGET_HAS_div2_i32         0
+#elif defined(TCG_TARGET_HAS_div2_i32)
+#define TCG_TARGET_HAS_div_i32          0
+#define TCG_TARGET_HAS_rem_i32          0
+#endif
+#if defined(TCG_TARGET_HAS_div_i64)
+#define TCG_TARGET_HAS_div2_i64         0
+#elif defined(TCG_TARGET_HAS_div2_i64)
+#define TCG_TARGET_HAS_div_i64          0
+#define TCG_TARGET_HAS_rem_i64          0
+#endif
+
+#if !defined(TCG_TARGET_HAS_v64) \
+    && !defined(TCG_TARGET_HAS_v128) \
+    && !defined(TCG_TARGET_HAS_v256)
+#define TCG_TARGET_MAYBE_vec            0
+#define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_neg_vec          0
+#define TCG_TARGET_HAS_not_vec          0
+#define TCG_TARGET_HAS_andc_vec         0
+#define TCG_TARGET_HAS_orc_vec          0
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          0
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         0
+#define TCG_TARGET_HAS_shi_vec          0
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          0
+#define TCG_TARGET_HAS_mul_vec          0
+#define TCG_TARGET_HAS_sat_vec          0
+#define TCG_TARGET_HAS_minmax_vec       0
+#define TCG_TARGET_HAS_bitsel_vec       0
+#define TCG_TARGET_HAS_cmpsel_vec       0
+#define TCG_TARGET_HAS_tst_vec          0
+#else
+#define TCG_TARGET_MAYBE_vec            1
+#endif
+#ifndef TCG_TARGET_HAS_v64
+#define TCG_TARGET_HAS_v64              0
+#endif
+#ifndef TCG_TARGET_HAS_v128
+#define TCG_TARGET_HAS_v128             0
+#endif
+#ifndef TCG_TARGET_HAS_v256
+#define TCG_TARGET_HAS_v256             0
+#endif
+
+#endif
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-4-philmd@linaro.org>
---
 tcg/aarch64/tcg-target-has.h | 119 +++++++++++++++++++++++++++++++++++
 tcg/aarch64/tcg-target.h     | 109 +-------------------------------
 2 files changed, 120 insertions(+), 108 deletions(-)
 create mode 100644 tcg/aarch64/tcg-target-has.h

diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/aarch64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+#include "host/cpuinfo.h"
+
+#define have_lse    (cpuinfo & CPUINFO_LSE)
+#define have_lse2   (cpuinfo & CPUINFO_LSE2)
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          1
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_eqv_i32          1
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          1
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
+#define TCG_TARGET_HAS_extract2_i32     1
+#define TCG_TARGET_HAS_negsetcond_i32   1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_extr_i64_i32     0
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_orc_i64          1
+#define TCG_TARGET_HAS_eqv_i64          1
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          1
+#define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     1
+#define TCG_TARGET_HAS_extract2_i64     1
+#define TCG_TARGET_HAS_negsetcond_i64   1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        1
+#define TCG_TARGET_HAS_mulsh_i64        1
+
+/*
+ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
+ * which requires writable pages.  We must defer to the helper for user-only,
+ * but in system mode all ram is writable for the host.
+ */
+#ifdef CONFIG_USER_ONLY
+#define TCG_TARGET_HAS_qemu_ldst_i128   have_lse2
+#else
+#define TCG_TARGET_HAS_qemu_ldst_i128   1
+#endif
+
+#define TCG_TARGET_HAS_tst              1
+
+#define TCG_TARGET_HAS_v64              1
+#define TCG_TARGET_HAS_v128             1
+#define TCG_TARGET_HAS_v256             0
+
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          1
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          0
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          1
+#define TCG_TARGET_HAS_abs_vec          1
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         0
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          1
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       1
+#define TCG_TARGET_HAS_cmpsel_vec       0
+#define TCG_TARGET_HAS_tst_vec          1
+
+#endif
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@
 #ifndef AARCH64_TCG_TARGET_H
 #define AARCH64_TCG_TARGET_H
 
-#include "host/cpuinfo.h"
-
 #define TCG_TARGET_INSN_UNIT_SIZE  4
 #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
@@ -XXX,XX +XXX,XX @@ typedef enum {
 
 #define TCG_TARGET_NB_REGS 64
 
-#define have_lse    (cpuinfo & CPUINFO_LSE)
-#define have_lse2   (cpuinfo & CPUINFO_LSE2)
-
-/* optional instructions */
-#define TCG_TARGET_HAS_div_i32          1
-#define TCG_TARGET_HAS_rem_i32          1
-#define TCG_TARGET_HAS_ext8s_i32        1
-#define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_ext8u_i32        1
-#define TCG_TARGET_HAS_ext16u_i32       1
-#define TCG_TARGET_HAS_bswap16_i32      1
-#define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_andc_i32         1
-#define TCG_TARGET_HAS_orc_i32          1
-#define TCG_TARGET_HAS_eqv_i32          1
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_clz_i32          1
-#define TCG_TARGET_HAS_ctz_i32          1
-#define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
-#define TCG_TARGET_HAS_extract2_i32     1
-#define TCG_TARGET_HAS_negsetcond_i32   1
-#define TCG_TARGET_HAS_add2_i32         1
-#define TCG_TARGET_HAS_sub2_i32         1
-#define TCG_TARGET_HAS_mulu2_i32        0
-#define TCG_TARGET_HAS_muls2_i32        0
-#define TCG_TARGET_HAS_muluh_i32        0
-#define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_extr_i64_i32     0
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-#define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          1
-#define TCG_TARGET_HAS_ext8s_i64        1
-#define TCG_TARGET_HAS_ext16s_i64       1
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        1
-#define TCG_TARGET_HAS_ext16u_i64       1
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_bswap16_i64      1
-#define TCG_TARGET_HAS_bswap32_i64      1
-#define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_andc_i64         1
-#define TCG_TARGET_HAS_orc_i64          1
-#define TCG_TARGET_HAS_eqv_i64          1
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_clz_i64          1
-#define TCG_TARGET_HAS_ctz_i64          1
-#define TCG_TARGET_HAS_ctpop_i64        0
-#define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
-#define TCG_TARGET_HAS_extract2_i64     1
-#define TCG_TARGET_HAS_negsetcond_i64   1
-#define TCG_TARGET_HAS_add2_i64         1
-#define TCG_TARGET_HAS_sub2_i64         1
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i64        0
-#define TCG_TARGET_HAS_muluh_i64        1
-#define TCG_TARGET_HAS_mulsh_i64        1
-
-/*
- * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
- * which requires writable pages.  We must defer to the helper for user-only,
- * but in system mode all ram is writable for the host.
- */
-#ifdef CONFIG_USER_ONLY
-#define TCG_TARGET_HAS_qemu_ldst_i128   have_lse2
-#else
-#define TCG_TARGET_HAS_qemu_ldst_i128   1
-#endif
-
-#define TCG_TARGET_HAS_tst              1
-
-#define TCG_TARGET_HAS_v64              1
-#define TCG_TARGET_HAS_v128             1
-#define TCG_TARGET_HAS_v256             0
-
-#define TCG_TARGET_HAS_andc_vec         1
-#define TCG_TARGET_HAS_orc_vec          1
-#define TCG_TARGET_HAS_nand_vec         0
-#define TCG_TARGET_HAS_nor_vec          0
-#define TCG_TARGET_HAS_eqv_vec          0
-#define TCG_TARGET_HAS_not_vec          1
-#define TCG_TARGET_HAS_neg_vec          1
-#define TCG_TARGET_HAS_abs_vec          1
-#define TCG_TARGET_HAS_roti_vec         0
-#define TCG_TARGET_HAS_rots_vec         0
-#define TCG_TARGET_HAS_rotv_vec         0
-#define TCG_TARGET_HAS_shi_vec          1
-#define TCG_TARGET_HAS_shs_vec          0
-#define TCG_TARGET_HAS_shv_vec          1
-#define TCG_TARGET_HAS_mul_vec          1
-#define TCG_TARGET_HAS_sat_vec          1
-#define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       1
-#define TCG_TARGET_HAS_cmpsel_vec       0
-#define TCG_TARGET_HAS_tst_vec          1
+#include "tcg-target-has.h"
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-5-philmd@linaro.org>
---
 tcg/arm/tcg-target-has.h | 85 ++++++++++++++++++++++++++++++++++++++++
 tcg/arm/tcg-target.h     | 74 +---------------------------------
 2 files changed, 86 insertions(+), 73 deletions(-)
 create mode 100644 tcg/arm/tcg-target-has.h

diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/arm/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2008 Fabrice Bellard
+ * Copyright (c) 2008 Andrzej Zaborowski
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+extern int arm_arch;
+
+#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+#define use_idiv_instructions  1
+#else
+extern bool use_idiv_instructions;
+#endif
+#ifdef __ARM_NEON__
+#define use_neon_instructions  1
+#else
+extern bool use_neon_instructions;
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        0 /* and r0, r1, #0xff */
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_orc_i32          0
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
+#define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
+#define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
+#define TCG_TARGET_HAS_extract2_i32     1
+#define TCG_TARGET_HAS_negsetcond_i32   1
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_div_i32          use_idiv_instructions
+#define TCG_TARGET_HAS_rem_i32          0
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
+#define TCG_TARGET_HAS_tst              1
+
+#define TCG_TARGET_HAS_v64              use_neon_instructions
+#define TCG_TARGET_HAS_v128             use_neon_instructions
+#define TCG_TARGET_HAS_v256             0
+
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          1
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          0
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          1
+#define TCG_TARGET_HAS_abs_vec          1
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         0
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          0
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       1
+#define TCG_TARGET_HAS_cmpsel_vec       0
+#define TCG_TARGET_HAS_tst_vec          1
+
+#endif
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -XXX,XX +XXX,XX @@
 #ifndef ARM_TCG_TARGET_H
 #define ARM_TCG_TARGET_H
 
-extern int arm_arch;
-
-#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
-
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define MAX_CODE_GEN_BUFFER_SIZE  UINT32_MAX
 
@@ -XXX,XX +XXX,XX @@ typedef enum {
 
 #define TCG_TARGET_NB_REGS 32
 
-#ifdef __ARM_ARCH_EXT_IDIV__
-#define use_idiv_instructions  1
-#else
-extern bool use_idiv_instructions;
-#endif
-#ifdef __ARM_NEON__
-#define use_neon_instructions  1
-#else
-extern bool use_neon_instructions;
-#endif
-
-/* optional instructions */
-#define TCG_TARGET_HAS_ext8s_i32        1
-#define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_ext8u_i32        0 /* and r0, r1, #0xff */
-#define TCG_TARGET_HAS_ext16u_i32       1
-#define TCG_TARGET_HAS_bswap16_i32      1
-#define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_andc_i32         1
-#define TCG_TARGET_HAS_orc_i32          0
-#define TCG_TARGET_HAS_eqv_i32          0
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_clz_i32          1
-#define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
-#define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
-#define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
-#define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
-#define TCG_TARGET_HAS_extract2_i32     1
-#define TCG_TARGET_HAS_negsetcond_i32   1
-#define TCG_TARGET_HAS_mulu2_i32        1
-#define TCG_TARGET_HAS_muls2_i32        1
-#define TCG_TARGET_HAS_muluh_i32        0
-#define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_div_i32          use_idiv_instructions
-#define TCG_TARGET_HAS_rem_i32          0
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-#define TCG_TARGET_HAS_qemu_ldst_i128   0
-
-#define TCG_TARGET_HAS_tst              1
-
-#define TCG_TARGET_HAS_v64              use_neon_instructions
-#define TCG_TARGET_HAS_v128             use_neon_instructions
-#define TCG_TARGET_HAS_v256             0
-
-#define TCG_TARGET_HAS_andc_vec         1
-#define TCG_TARGET_HAS_orc_vec          1
-#define TCG_TARGET_HAS_nand_vec         0
-#define TCG_TARGET_HAS_nor_vec          0
-#define TCG_TARGET_HAS_eqv_vec          0
-#define TCG_TARGET_HAS_not_vec          1
-#define TCG_TARGET_HAS_neg_vec          1
-#define TCG_TARGET_HAS_abs_vec          1
-#define TCG_TARGET_HAS_roti_vec         0
-#define TCG_TARGET_HAS_rots_vec         0
-#define TCG_TARGET_HAS_rotv_vec         0
-#define TCG_TARGET_HAS_shi_vec          1
-#define TCG_TARGET_HAS_shs_vec          0
-#define TCG_TARGET_HAS_shv_vec          0
-#define TCG_TARGET_HAS_mul_vec          1
-#define TCG_TARGET_HAS_sat_vec          1
-#define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       1
-#define TCG_TARGET_HAS_cmpsel_vec       0
-#define TCG_TARGET_HAS_tst_vec          1
+#include "tcg-target-has.h"
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-6-philmd@linaro.org>
---
 tcg/i386/tcg-target-has.h | 139 ++++++++++++++++++++++++++++++++++++++
 tcg/i386/tcg-target.h     | 129 +----------------------------------
 2 files changed, 140 insertions(+), 128 deletions(-)
 create mode 100644 tcg/i386/tcg-target-has.h

diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/i386/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+#include "host/cpuinfo.h"
+
+#define have_bmi1         (cpuinfo & CPUINFO_BMI1)
+#define have_popcnt       (cpuinfo & CPUINFO_POPCNT)
+#define have_avx1         (cpuinfo & CPUINFO_AVX1)
+#define have_avx2         (cpuinfo & CPUINFO_AVX2)
+#define have_movbe        (cpuinfo & CPUINFO_MOVBE)
+
+/*
+ * There are interesting instructions in AVX512, so long as we have AVX512VL,
+ * which indicates support for EVEX on sizes smaller than 512 bits.
+ */
+#define have_avx512vl     ((cpuinfo & CPUINFO_AVX512VL) && \
+                           (cpuinfo & CPUINFO_AVX512F))
+#define have_avx512bw     ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
+#define have_avx512dq     ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
+#define have_avx512vbmi2  ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32         1
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_andc_i32         have_bmi1
+#define TCG_TARGET_HAS_orc_i32          0
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          1
+#define TCG_TARGET_HAS_ctpop_i32        have_popcnt
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
+#define TCG_TARGET_HAS_extract2_i32     1
+#define TCG_TARGET_HAS_negsetcond_i32   1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+
+#if TCG_TARGET_REG_BITS == 64
+/* Keep 32-bit values zero-extended in a register.  */
+#define TCG_TARGET_HAS_extr_i64_i32     1
+#define TCG_TARGET_HAS_div2_i64         1
+#define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_andc_i64         have_bmi1
+#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          1
+#define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_ctpop_i64        have_popcnt
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     1
+#define TCG_TARGET_HAS_negsetcond_i64   1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        1
+#define TCG_TARGET_HAS_muls2_i64        1
+#define TCG_TARGET_HAS_muluh_i64        0
+#define TCG_TARGET_HAS_mulsh_i64        0
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+#else
+#define TCG_TARGET_HAS_qemu_st8_i32     1
+#endif
+
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
+    (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
+
+#define TCG_TARGET_HAS_tst              1
+
+/* We do not support older SSE systems, only beginning with AVX1.  */
+#define TCG_TARGET_HAS_v64              have_avx1
+#define TCG_TARGET_HAS_v128             have_avx1
+#define TCG_TARGET_HAS_v256             have_avx2
+
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          have_avx512vl
+#define TCG_TARGET_HAS_nand_vec         have_avx512vl
+#define TCG_TARGET_HAS_nor_vec          have_avx512vl
+#define TCG_TARGET_HAS_eqv_vec          have_avx512vl
+#define TCG_TARGET_HAS_not_vec          have_avx512vl
+#define TCG_TARGET_HAS_neg_vec          0
+#define TCG_TARGET_HAS_abs_vec          1
+#define TCG_TARGET_HAS_roti_vec         have_avx512vl
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         have_avx512vl
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          1
+#define TCG_TARGET_HAS_shv_vec          have_avx2
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       have_avx512vl
+#define TCG_TARGET_HAS_cmpsel_vec       1
+#define TCG_TARGET_HAS_tst_vec          have_avx512bw
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) \
+    (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
+     (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
+#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
+
+/* Check for the possibility of high-byte extraction and, for 64-bit,
+   zero-extending 32-bit right-shift.  */
+#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
+#define TCG_TARGET_extract_i64_valid(ofs, len) \
+    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
+
+#endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@
 #ifndef I386_TCG_TARGET_H
 #define I386_TCG_TARGET_H
 
-#include "host/cpuinfo.h"
-
 #define TCG_TARGET_INSN_UNIT_SIZE  1
 
 #ifdef __x86_64__
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_CALL_STACK = TCG_REG_ESP
 } TCGReg;
 
-#define have_bmi1         (cpuinfo & CPUINFO_BMI1)
-#define have_popcnt       (cpuinfo & CPUINFO_POPCNT)
-#define have_avx1         (cpuinfo & CPUINFO_AVX1)
-#define have_avx2         (cpuinfo & CPUINFO_AVX2)
-#define have_movbe        (cpuinfo & CPUINFO_MOVBE)
-
-/*
- * There are interesting instructions in AVX512, so long as we have AVX512VL,
- * which indicates support for EVEX on sizes smaller than 512 bits.
- */
-#define have_avx512vl     ((cpuinfo & CPUINFO_AVX512VL) && \
-                           (cpuinfo & CPUINFO_AVX512F))
-#define have_avx512bw     ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
-#define have_avx512dq     ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
-#define have_avx512vbmi2  ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
-
-/* optional instructions */
-#define TCG_TARGET_HAS_div2_i32         1
-#define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_ext8s_i32        1
-#define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_ext8u_i32        1
-#define TCG_TARGET_HAS_ext16u_i32       1
-#define TCG_TARGET_HAS_bswap16_i32      1
-#define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_andc_i32         have_bmi1
-#define TCG_TARGET_HAS_orc_i32          0
-#define TCG_TARGET_HAS_eqv_i32          0
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_clz_i32          1
-#define TCG_TARGET_HAS_ctz_i32          1
-#define TCG_TARGET_HAS_ctpop_i32        have_popcnt
-#define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
-#define TCG_TARGET_HAS_extract2_i32     1
-#define TCG_TARGET_HAS_negsetcond_i32   1
-#define TCG_TARGET_HAS_add2_i32         1
-#define TCG_TARGET_HAS_sub2_i32         1
-#define TCG_TARGET_HAS_mulu2_i32        1
-#define TCG_TARGET_HAS_muls2_i32        1
-#define TCG_TARGET_HAS_muluh_i32        0
-#define TCG_TARGET_HAS_mulsh_i32        0
-
-#if TCG_TARGET_REG_BITS == 64
-/* Keep 32-bit values zero-extended in a register.  */
-#define TCG_TARGET_HAS_extr_i64_i32     1
-#define TCG_TARGET_HAS_div2_i64         1
-#define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_ext8s_i64        1
-#define TCG_TARGET_HAS_ext16s_i64       1
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        1
-#define TCG_TARGET_HAS_ext16u_i64       1
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_bswap16_i64      1
-#define TCG_TARGET_HAS_bswap32_i64      1
-#define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_andc_i64         have_bmi1
-#define TCG_TARGET_HAS_orc_i64          0
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_clz_i64          1
-#define TCG_TARGET_HAS_ctz_i64          1
-#define TCG_TARGET_HAS_ctpop_i64        have_popcnt
-#define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     0
-#define TCG_TARGET_HAS_extract2_i64     1
-#define TCG_TARGET_HAS_negsetcond_i64   1
-#define TCG_TARGET_HAS_add2_i64         1
-#define TCG_TARGET_HAS_sub2_i64         1
-#define TCG_TARGET_HAS_mulu2_i64        1
-#define TCG_TARGET_HAS_muls2_i64        1
-#define TCG_TARGET_HAS_muluh_i64        0
-#define TCG_TARGET_HAS_mulsh_i64        0
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-#else
-#define TCG_TARGET_HAS_qemu_st8_i32     1
-#endif
-
-#define TCG_TARGET_HAS_qemu_ldst_i128 \
-    (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
-
-#define TCG_TARGET_HAS_tst              1
-
-/* We do not support older SSE systems, only beginning with AVX1.  */
-#define TCG_TARGET_HAS_v64              have_avx1
-#define TCG_TARGET_HAS_v128             have_avx1
-#define TCG_TARGET_HAS_v256             have_avx2
-
-#define TCG_TARGET_HAS_andc_vec         1
-#define TCG_TARGET_HAS_orc_vec          have_avx512vl
-#define TCG_TARGET_HAS_nand_vec         have_avx512vl
-#define TCG_TARGET_HAS_nor_vec          have_avx512vl
-#define TCG_TARGET_HAS_eqv_vec          have_avx512vl
-#define TCG_TARGET_HAS_not_vec          have_avx512vl
-#define TCG_TARGET_HAS_neg_vec          0
-#define TCG_TARGET_HAS_abs_vec          1
-#define TCG_TARGET_HAS_roti_vec         have_avx512vl
-#define TCG_TARGET_HAS_rots_vec         0
-#define TCG_TARGET_HAS_rotv_vec         have_avx512vl
-#define TCG_TARGET_HAS_shi_vec          1
-#define TCG_TARGET_HAS_shs_vec          1
-#define TCG_TARGET_HAS_shv_vec          have_avx2
-#define TCG_TARGET_HAS_mul_vec          1
-#define TCG_TARGET_HAS_sat_vec          1
-#define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       have_avx512vl
-#define TCG_TARGET_HAS_cmpsel_vec       1
-#define TCG_TARGET_HAS_tst_vec          have_avx512bw
-
-#define TCG_TARGET_deposit_i32_valid(ofs, len) \
-    (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
-     (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
-#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
-
-/* Check for the possibility of high-byte extraction and, for 64-bit,
-   zero-extending 32-bit right-shift.  */
-#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
-#define TCG_TARGET_extract_i64_valid(ofs, len) \
-    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
+#include "tcg-target-has.h"
 
 /* This defines the natural memory order supported by this
  * architecture before guarantees made by various barrier
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-7-philmd@linaro.org>
---
 tcg/loongarch64/tcg-target-has.h | 113 +++++++++++++++++++++++++++++++
 tcg/loongarch64/tcg-target.h     | 102 +---------------------------
 2 files changed, 114 insertions(+), 101 deletions(-)
 create mode 100644 tcg/loongarch64/tcg-target-has.h

diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/loongarch64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+#include "host/cpuinfo.h"
+
+/* optional instructions */
+#define TCG_TARGET_HAS_negsetcond_i32   0
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          1
+#define TCG_TARGET_HAS_div2_i32         0
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muluh_i32        1
+#define TCG_TARGET_HAS_mulsh_i32        1
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          1
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          1
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_brcond2          0
+#define TCG_TARGET_HAS_setcond2         0
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+/* 64-bit operations */
+#define TCG_TARGET_HAS_negsetcond_i64   0
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          1
+#define TCG_TARGET_HAS_div2_i64         0
+#define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_extr_i64_i32     1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_orc_i64          1
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          1
+#define TCG_TARGET_HAS_clz_i64          1
+#define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        1
+#define TCG_TARGET_HAS_mulsh_i64        1
+
+#define TCG_TARGET_HAS_qemu_ldst_i128   (cpuinfo & CPUINFO_LSX)
+
+#define TCG_TARGET_HAS_tst              0
+
+#define TCG_TARGET_HAS_v64              (cpuinfo & CPUINFO_LSX)
+#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_LSX)
+#define TCG_TARGET_HAS_v256             (cpuinfo & CPUINFO_LASX)
+
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          1
+#define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          1
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          1
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          1
+#define TCG_TARGET_HAS_roti_vec         1
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       1
+#define TCG_TARGET_HAS_cmpsel_vec       0
+#define TCG_TARGET_HAS_tst_vec          0
+
+
+#endif
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@
 #ifndef LOONGARCH_TCG_TARGET_H
 #define LOONGARCH_TCG_TARGET_H
 
-#include "host/cpuinfo.h"
-
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_NB_REGS 64
 
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_VEC_TMP0 = TCG_REG_V23,
 } TCGReg;
 
-/* optional instructions */
-#define TCG_TARGET_HAS_negsetcond_i32   0
-#define TCG_TARGET_HAS_div_i32          1
-#define TCG_TARGET_HAS_rem_i32          1
-#define TCG_TARGET_HAS_div2_i32         0
-#define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     0
-#define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_mulu2_i32        0
-#define TCG_TARGET_HAS_muls2_i32        0
-#define TCG_TARGET_HAS_muluh_i32        1
-#define TCG_TARGET_HAS_mulsh_i32        1
-#define TCG_TARGET_HAS_ext8s_i32        1
-#define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_ext8u_i32        1
-#define TCG_TARGET_HAS_ext16u_i32       1
-#define TCG_TARGET_HAS_bswap16_i32      1
-#define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_andc_i32         1
-#define TCG_TARGET_HAS_orc_i32          1
-#define TCG_TARGET_HAS_eqv_i32          0
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          1
-#define TCG_TARGET_HAS_clz_i32          1
-#define TCG_TARGET_HAS_ctz_i32          1
-#define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_brcond2          0
-#define TCG_TARGET_HAS_setcond2         0
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-/* 64-bit operations */
-#define TCG_TARGET_HAS_negsetcond_i64   0
-#define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          1
-#define TCG_TARGET_HAS_div2_i64         0
-#define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     0
-#define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_extr_i64_i32     1
-#define TCG_TARGET_HAS_ext8s_i64        1
-#define TCG_TARGET_HAS_ext16s_i64       1
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        1
-#define TCG_TARGET_HAS_ext16u_i64       1
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_bswap16_i64      1
-#define TCG_TARGET_HAS_bswap32_i64      1
-#define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_andc_i64         1
-#define TCG_TARGET_HAS_orc_i64          1
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          1
-#define TCG_TARGET_HAS_clz_i64          1
-#define TCG_TARGET_HAS_ctz_i64          1
-#define TCG_TARGET_HAS_ctpop_i64        0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i64        0
-#define TCG_TARGET_HAS_muluh_i64        1
-#define TCG_TARGET_HAS_mulsh_i64        1
-
-#define TCG_TARGET_HAS_qemu_ldst_i128   (cpuinfo & CPUINFO_LSX)
-
-#define TCG_TARGET_HAS_tst              0
-
-#define TCG_TARGET_HAS_v64              (cpuinfo & CPUINFO_LSX)
-#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_LSX)
-#define TCG_TARGET_HAS_v256             (cpuinfo & CPUINFO_LASX)
-
-#define TCG_TARGET_HAS_not_vec          1
-#define TCG_TARGET_HAS_neg_vec          1
-#define TCG_TARGET_HAS_abs_vec          0
-#define TCG_TARGET_HAS_andc_vec         1
-#define TCG_TARGET_HAS_orc_vec          1
-#define TCG_TARGET_HAS_nand_vec         0
-#define TCG_TARGET_HAS_nor_vec          1
-#define TCG_TARGET_HAS_eqv_vec          0
-#define TCG_TARGET_HAS_mul_vec          1
-#define TCG_TARGET_HAS_shi_vec          1
-#define TCG_TARGET_HAS_shs_vec          0
-#define TCG_TARGET_HAS_shv_vec          1
-#define TCG_TARGET_HAS_roti_vec         1
-#define TCG_TARGET_HAS_rots_vec         0
-#define TCG_TARGET_HAS_rotv_vec         1
-#define TCG_TARGET_HAS_sat_vec          1
-#define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       1
-#define TCG_TARGET_HAS_cmpsel_vec       0
-#define TCG_TARGET_HAS_tst_vec          0
+#include "tcg-target-has.h"
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-8-philmd@linaro.org>
---
 tcg/mips/tcg-target-has.h | 122 ++++++++++++++++++++++++++++++++++++++
 tcg/mips/tcg-target.h     | 112 +---------------------------------
 2 files changed, 123 insertions(+), 111 deletions(-)
 create mode 100644 tcg/mips/tcg-target-has.h

diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/mips/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+/* MOVN/MOVZ instructions detection */
+#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
+    defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
+    defined(_MIPS_ARCH_MIPS4)
+#define use_movnz_instructions  1
+#else
+extern bool use_movnz_instructions;
+#endif
+
+/* MIPS32 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
+#define use_mips32_instructions  1
+#else
+extern bool use_mips32_instructions;
+#endif
+
+/* MIPS32R2 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+#define use_mips32r2_instructions  1
+#else
+extern bool use_mips32r2_instructions;
+#endif
+
+/* MIPS32R6 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+#define use_mips32r6_instructions  1
+#else
+#define use_mips32r6_instructions  0
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_nor_i32          1
+#define TCG_TARGET_HAS_andc_i32         0
+#define TCG_TARGET_HAS_orc_i32          0
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muluh_i32        1
+#define TCG_TARGET_HAS_mulsh_i32        1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_negsetcond_i32   0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_extr_i64_i32     1
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_nor_i64          1
+#define TCG_TARGET_HAS_andc_i64         0
+#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muls2_i64        (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muluh_i64        1
+#define TCG_TARGET_HAS_mulsh_i64        1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_negsetcond_i64   0
+#endif
+
+/* optional instructions detected at runtime */
+#define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
+#define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
+#define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
+#define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions
+#define TCG_TARGET_HAS_clz_i32          use_mips32r2_instructions
+#define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_bswap16_i64      use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap32_i64      use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap64_i64      use_mips32r2_instructions
+#define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
+#define TCG_TARGET_HAS_extract_i64      use_mips32r2_instructions
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
+#define TCG_TARGET_HAS_rot_i64          use_mips32r2_instructions
+#define TCG_TARGET_HAS_clz_i64          use_mips32r2_instructions
+#define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
+#endif
+
+/* optional instructions automatically implemented */
+#define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
+#define TCG_TARGET_HAS_ext16u_i32       0 /* andi rt, rs, 0xffff */
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_ext8u_i64        0 /* andi rt, rs, 0xff   */
+#define TCG_TARGET_HAS_ext16u_i64       0 /* andi rt, rs, 0xffff */
+#endif
+
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+#define TCG_TARGET_HAS_tst              0
+
+#endif
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_AREG0 = TCG_REG_S8,
 } TCGReg;
 
-/* MOVN/MOVZ instructions detection */
-#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
-    defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
-    defined(_MIPS_ARCH_MIPS4)
-#define use_movnz_instructions  1
-#else
-extern bool use_movnz_instructions;
-#endif
-
-/* MIPS32 instruction set detection */
-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
-#define use_mips32_instructions  1
-#else
-extern bool use_mips32_instructions;
-#endif
-
-/* MIPS32R2 instruction set detection */
-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
-#define use_mips32r2_instructions  1
-#else
-extern bool use_mips32r2_instructions;
-#endif
-
-/* MIPS32R6 instruction set detection */
-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
-#define use_mips32r6_instructions  1
-#else
-#define use_mips32r6_instructions  0
-#endif
-
-/* optional instructions */
-#define TCG_TARGET_HAS_div_i32          1
-#define TCG_TARGET_HAS_rem_i32          1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_nor_i32          1
-#define TCG_TARGET_HAS_andc_i32         0
-#define TCG_TARGET_HAS_orc_i32          0
-#define TCG_TARGET_HAS_eqv_i32          0
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_mulu2_i32        (!use_mips32r6_instructions)
-#define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
-#define TCG_TARGET_HAS_muluh_i32        1
-#define TCG_TARGET_HAS_mulsh_i32        1
-#define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_negsetcond_i32   0
-
-#if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_extr_i64_i32     1
-#define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_nor_i64          1
-#define TCG_TARGET_HAS_andc_i64         0
-#define TCG_TARGET_HAS_orc_i64          0
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i64        (!use_mips32r6_instructions)
-#define TCG_TARGET_HAS_muls2_i64        (!use_mips32r6_instructions)
-#define TCG_TARGET_HAS_muluh_i64        1
-#define TCG_TARGET_HAS_mulsh_i64        1
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_negsetcond_i64   0
-#endif
-
-/* optional instructions detected at runtime */
-#define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
-#define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
-#define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
-#define TCG_TARGET_HAS_sextract_i32     0
-#define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
-#define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
-#define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions
-#define TCG_TARGET_HAS_clz_i32          use_mips32r2_instructions
-#define TCG_TARGET_HAS_ctz_i32          0
-#define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-#if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_bswap16_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap32_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap64_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_extract_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_sextract_i64     0
-#define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
-#define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
-#define TCG_TARGET_HAS_rot_i64          use_mips32r2_instructions
-#define TCG_TARGET_HAS_clz_i64          use_mips32r2_instructions
-#define TCG_TARGET_HAS_ctz_i64          0
-#define TCG_TARGET_HAS_ctpop_i64        0
-#endif
-
-/* optional instructions automatically implemented */
-#define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
-#define TCG_TARGET_HAS_ext16u_i32       0 /* andi rt, rs, 0xffff */
-
-#if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_ext8u_i64        0 /* andi rt, rs, 0xff   */
-#define TCG_TARGET_HAS_ext16u_i64       0 /* andi rt, rs, 0xffff */
-#endif
-
-#define TCG_TARGET_HAS_qemu_ldst_i128   0
-
-#define TCG_TARGET_HAS_tst              0
+#include "tcg-target-has.h"
 
 #define TCG_TARGET_DEFAULT_MO           0
 
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-9-philmd@linaro.org>
---
 tcg/ppc/tcg-target-has.h | 124 +++++++++++++++++++++++++++++++++++++++
 tcg/ppc/tcg-target.h     | 114 +----------------------------------
 2 files changed, 125 insertions(+), 113 deletions(-)
 create mode 100644 tcg/ppc/tcg-target-has.h

diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/ppc/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+#include "host/cpuinfo.h"
+
+#define have_isa_2_06  (cpuinfo & CPUINFO_V2_06)
+#define have_isa_2_07  (cpuinfo & CPUINFO_V2_07)
+#define have_isa_3_00  (cpuinfo & CPUINFO_V3_0)
+#define have_isa_3_10  (cpuinfo & CPUINFO_V3_1)
+#define have_altivec   (cpuinfo & CPUINFO_ALTIVEC)
+#define have_vsx       (cpuinfo & CPUINFO_VSX)
+
+/* optional instructions automatically implemented */
+#define TCG_TARGET_HAS_ext8u_i32        0 /* andi */
+#define TCG_TARGET_HAS_ext16u_i32       0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          have_isa_3_00
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_eqv_i32          1
+#define TCG_TARGET_HAS_nand_i32         1
+#define TCG_TARGET_HAS_nor_i32          1
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
+#define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_negsetcond_i32   1
+#define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muluh_i32        1
+#define TCG_TARGET_HAS_mulsh_i32        1
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_extr_i64_i32     0
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          have_isa_3_00
+#define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        0
+#define TCG_TARGET_HAS_ext16u_i64       0
+#define TCG_TARGET_HAS_ext32u_i64       0
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_orc_i64          1
+#define TCG_TARGET_HAS_eqv_i64          1
+#define TCG_TARGET_HAS_nand_i64         1
+#define TCG_TARGET_HAS_nor_i64          1
+#define TCG_TARGET_HAS_clz_i64          1
+#define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
+#define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_negsetcond_i64   1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        1
+#define TCG_TARGET_HAS_mulsh_i64        1
+#endif
+
+#define TCG_TARGET_HAS_qemu_ldst_i128   \
+    (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
+
+#define TCG_TARGET_HAS_tst              1
+
+/*
+ * While technically Altivec could support V64, it has no 64-bit store
+ * instruction and substituting two 32-bit stores makes the generated
+ * code quite large.
+ */
+#define TCG_TARGET_HAS_v64              have_vsx
+#define TCG_TARGET_HAS_v128             have_altivec
+#define TCG_TARGET_HAS_v256             0
+
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          have_isa_2_07
+#define TCG_TARGET_HAS_nand_vec         have_isa_2_07
+#define TCG_TARGET_HAS_nor_vec          1
+#define TCG_TARGET_HAS_eqv_vec          have_isa_2_07
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          have_isa_3_00
+#define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         1
+#define TCG_TARGET_HAS_shi_vec          0
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          1
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       have_vsx
+#define TCG_TARGET_HAS_cmpsel_vec       1
+#define TCG_TARGET_HAS_tst_vec          0
+
+#endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@
 #ifndef PPC_TCG_TARGET_H
 #define PPC_TCG_TARGET_H
 
-#include "host/cpuinfo.h"
-
 #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
 #define TCG_TARGET_NB_REGS 64
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_AREG0 = TCG_REG_R27
 } TCGReg;
 
-#define have_isa_2_06  (cpuinfo & CPUINFO_V2_06)
-#define have_isa_2_07  (cpuinfo & CPUINFO_V2_07)
-#define have_isa_3_00  (cpuinfo & CPUINFO_V3_0)
-#define have_isa_3_10  (cpuinfo & CPUINFO_V3_1)
-#define have_altivec   (cpuinfo & CPUINFO_ALTIVEC)
-#define have_vsx       (cpuinfo & CPUINFO_VSX)
-
-/* optional instructions automatically implemented */
-#define TCG_TARGET_HAS_ext8u_i32        0 /* andi */
-#define TCG_TARGET_HAS_ext16u_i32       0
-
-/* optional instructions */
-#define TCG_TARGET_HAS_div_i32          1
-#define TCG_TARGET_HAS_rem_i32          have_isa_3_00
-#define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_ext8s_i32        1
-#define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_bswap16_i32      1
-#define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_andc_i32         1
-#define TCG_TARGET_HAS_orc_i32          1
-#define TCG_TARGET_HAS_eqv_i32          1
-#define TCG_TARGET_HAS_nand_i32         1
-#define TCG_TARGET_HAS_nor_i32          1
-#define TCG_TARGET_HAS_clz_i32          1
-#define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
-#define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
-#define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     0
-#define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_negsetcond_i32   1
-#define TCG_TARGET_HAS_mulu2_i32        0
-#define TCG_TARGET_HAS_muls2_i32        0
-#define TCG_TARGET_HAS_muluh_i32        1
-#define TCG_TARGET_HAS_mulsh_i32        1
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-#if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_extr_i64_i32     0
-#define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          have_isa_3_00
-#define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_ext8s_i64        1
-#define TCG_TARGET_HAS_ext16s_i64       1
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        0
-#define TCG_TARGET_HAS_ext16u_i64       0
-#define TCG_TARGET_HAS_ext32u_i64       0
-#define TCG_TARGET_HAS_bswap16_i64      1
-#define TCG_TARGET_HAS_bswap32_i64      1
-#define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_andc_i64         1
-#define TCG_TARGET_HAS_orc_i64          1
-#define TCG_TARGET_HAS_eqv_i64          1
-#define TCG_TARGET_HAS_nand_i64         1
-#define TCG_TARGET_HAS_nor_i64          1
-#define TCG_TARGET_HAS_clz_i64          1
-#define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
-#define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
-#define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     0
-#define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_negsetcond_i64   1
-#define TCG_TARGET_HAS_add2_i64         1
-#define TCG_TARGET_HAS_sub2_i64         1
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i64        0
-#define TCG_TARGET_HAS_muluh_i64        1
-#define TCG_TARGET_HAS_mulsh_i64        1
-#endif
-
-#define TCG_TARGET_HAS_qemu_ldst_i128   \
-    (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
-
-#define TCG_TARGET_HAS_tst              1
-
-/*
- * While technically Altivec could support V64, it has no 64-bit store
- * instruction and substituting two 32-bit stores makes the generated
- * code quite large.
- */
-#define TCG_TARGET_HAS_v64              have_vsx
-#define TCG_TARGET_HAS_v128             have_altivec
-#define TCG_TARGET_HAS_v256             0
-
-#define TCG_TARGET_HAS_andc_vec         1
-#define TCG_TARGET_HAS_orc_vec          have_isa_2_07
-#define TCG_TARGET_HAS_nand_vec         have_isa_2_07
-#define TCG_TARGET_HAS_nor_vec          1
-#define TCG_TARGET_HAS_eqv_vec          have_isa_2_07
-#define TCG_TARGET_HAS_not_vec          1
-#define TCG_TARGET_HAS_neg_vec          have_isa_3_00
-#define TCG_TARGET_HAS_abs_vec          0
-#define TCG_TARGET_HAS_roti_vec         0
-#define TCG_TARGET_HAS_rots_vec         0
-#define TCG_TARGET_HAS_rotv_vec         1
-#define TCG_TARGET_HAS_shi_vec          0
-#define TCG_TARGET_HAS_shs_vec          0
-#define TCG_TARGET_HAS_shv_vec          1
-#define TCG_TARGET_HAS_mul_vec          1
-#define TCG_TARGET_HAS_sat_vec          1
-#define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       have_vsx
-#define TCG_TARGET_HAS_cmpsel_vec       1
-#define TCG_TARGET_HAS_tst_vec          0
+#include "tcg-target-has.h"
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-10-philmd@linaro.org>
---
 tcg/riscv/tcg-target-has.h | 112 +++++++++++++++++++++++++++++++++++++
 tcg/riscv/tcg-target.h     | 102 +--------------------------------
 2 files changed, 113 insertions(+), 101 deletions(-)
 create mode 100644 tcg/riscv/tcg-target-has.h

diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/riscv/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2018 SiFive, Inc
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+#include "host/cpuinfo.h"
+
+/* optional instructions */
+#define TCG_TARGET_HAS_negsetcond_i32   1
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          1
+#define TCG_TARGET_HAS_div2_i32         0
+#define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_bswap16_i32      (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_bswap32_i32      (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_andc_i32         (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_orc_i32          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_eqv_i32          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_clz_i32          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_ctz_i32          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_ctpop_i32        (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_brcond2          1
+#define TCG_TARGET_HAS_setcond2         1
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+#define TCG_TARGET_HAS_negsetcond_i64   1
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          1
+#define TCG_TARGET_HAS_div2_i64         0
+#define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_extr_i64_i32     1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_bswap32_i64      (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_bswap64_i64      (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_andc_i64         (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_orc_i64          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_eqv_i64          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_ctz_i64          (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_ctpop_i64        (cpuinfo & CPUINFO_ZBB)
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        1
+#define TCG_TARGET_HAS_mulsh_i64        1
+
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
+#define TCG_TARGET_HAS_tst              0
+
+/* vector instructions */
+#define TCG_TARGET_HAS_v64              (cpuinfo & CPUINFO_ZVE64X)
+#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_ZVE64X)
+#define TCG_TARGET_HAS_v256             (cpuinfo & CPUINFO_ZVE64X)
+#define TCG_TARGET_HAS_andc_vec         0
+#define TCG_TARGET_HAS_orc_vec          0
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          0
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          1
+#define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_roti_vec         1
+#define TCG_TARGET_HAS_rots_vec         1
+#define TCG_TARGET_HAS_rotv_vec         1
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          1
+#define TCG_TARGET_HAS_shv_vec          1
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       0
+#define TCG_TARGET_HAS_cmpsel_vec       1
+
+#define TCG_TARGET_HAS_tst_vec          0
+
+#endif
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -XXX,XX +XXX,XX @@
 #ifndef RISCV_TCG_TARGET_H
 #define RISCV_TCG_TARGET_H
 
-#include "host/cpuinfo.h"
-
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_NB_REGS 64
 #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_TMP2       = TCG_REG_T4,
 } TCGReg;
 
-/* optional instructions */
-#define TCG_TARGET_HAS_negsetcond_i32   1
-#define TCG_TARGET_HAS_div_i32          1
-#define TCG_TARGET_HAS_rem_i32          1
-#define TCG_TARGET_HAS_div2_i32         0
-#define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_extract_i32      0
-#define TCG_TARGET_HAS_sextract_i32     0
-#define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_add2_i32         1
-#define TCG_TARGET_HAS_sub2_i32         1
-#define TCG_TARGET_HAS_mulu2_i32        0
-#define TCG_TARGET_HAS_muls2_i32        0
-#define TCG_TARGET_HAS_muluh_i32        0
-#define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_ext8s_i32        1
-#define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_ext8u_i32        1
-#define TCG_TARGET_HAS_ext16u_i32       1
-#define TCG_TARGET_HAS_bswap16_i32      (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_bswap32_i32      (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_andc_i32         (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_orc_i32          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_eqv_i32          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_clz_i32          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_ctz_i32          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_ctpop_i32        (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_brcond2          1
-#define TCG_TARGET_HAS_setcond2         1
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-#define TCG_TARGET_HAS_negsetcond_i64   1
-#define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          1
-#define TCG_TARGET_HAS_div2_i64         0
-#define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      0
-#define TCG_TARGET_HAS_sextract_i64     0
-#define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_extr_i64_i32     1
-#define TCG_TARGET_HAS_ext8s_i64        1
-#define TCG_TARGET_HAS_ext16s_i64       1
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        1
-#define TCG_TARGET_HAS_ext16u_i64       1
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_bswap16_i64      (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_bswap32_i64      (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_bswap64_i64      (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_andc_i64         (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_orc_i64          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_eqv_i64          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_clz_i64          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_ctz_i64          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_ctpop_i64        (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_add2_i64         1
-#define TCG_TARGET_HAS_sub2_i64         1
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i64        0
-#define TCG_TARGET_HAS_muluh_i64        1
-#define TCG_TARGET_HAS_mulsh_i64        1
-
-#define TCG_TARGET_HAS_qemu_ldst_i128   0
-
-#define TCG_TARGET_HAS_tst              0
-
-/* vector instructions */
-#define TCG_TARGET_HAS_v64              (cpuinfo & CPUINFO_ZVE64X)
-#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_ZVE64X)
-#define TCG_TARGET_HAS_v256             (cpuinfo & CPUINFO_ZVE64X)
-#define TCG_TARGET_HAS_andc_vec         0
-#define TCG_TARGET_HAS_orc_vec          0
-#define TCG_TARGET_HAS_nand_vec         0
-#define TCG_TARGET_HAS_nor_vec          0
-#define TCG_TARGET_HAS_eqv_vec          0
-#define TCG_TARGET_HAS_not_vec          1
-#define TCG_TARGET_HAS_neg_vec          1
-#define TCG_TARGET_HAS_abs_vec          0
-#define TCG_TARGET_HAS_roti_vec         1
-#define TCG_TARGET_HAS_rots_vec         1
-#define TCG_TARGET_HAS_rotv_vec         1
-#define TCG_TARGET_HAS_shi_vec          1
-#define TCG_TARGET_HAS_shs_vec          1
-#define TCG_TARGET_HAS_shv_vec          1
-#define TCG_TARGET_HAS_mul_vec          1
-#define TCG_TARGET_HAS_sat_vec          1
-#define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       0
-#define TCG_TARGET_HAS_cmpsel_vec       1
-
-#define TCG_TARGET_HAS_tst_vec          0
+#include "tcg-target-has.h"
 
 #define TCG_TARGET_DEFAULT_MO (0)
 
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-11-philmd@linaro.org>
---
 tcg/s390x/tcg-target-has.h | 124 +++++++++++++++++++++++++++++++++++++
 tcg/s390x/tcg-target.h     | 114 +---------------------------------
 2 files changed, 125 insertions(+), 113 deletions(-)
 create mode 100644 tcg/s390x/tcg-target-has.h

diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/s390x/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+/* Facilities required for proper operation; checked at startup. */
+
+#define FACILITY_ZARCH_ACTIVE         2
+#define FACILITY_LONG_DISP            18
+#define FACILITY_EXT_IMM              21
+#define FACILITY_GEN_INST_EXT         34
+#define FACILITY_45                   45
+
+/* Facilities that are checked at runtime. */
+
+#define FACILITY_LOAD_ON_COND2        53
+#define FACILITY_MISC_INSN_EXT2       58
+#define FACILITY_MISC_INSN_EXT3       61
+#define FACILITY_VECTOR               129
+#define FACILITY_VECTOR_ENH1          135
+
+extern uint64_t s390_facilities[3];
+
+#define HAVE_FACILITY(X) \
+    ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32       1
+#define TCG_TARGET_HAS_rot_i32        1
+#define TCG_TARGET_HAS_ext8s_i32      1
+#define TCG_TARGET_HAS_ext16s_i32     1
+#define TCG_TARGET_HAS_ext8u_i32      1
+#define TCG_TARGET_HAS_ext16u_i32     1
+#define TCG_TARGET_HAS_bswap16_i32    1
+#define TCG_TARGET_HAS_bswap32_i32    1
+#define TCG_TARGET_HAS_not_i32        HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_andc_i32       HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_orc_i32        HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_eqv_i32        HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_nand_i32       HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_nor_i32        HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_clz_i32        0
+#define TCG_TARGET_HAS_ctz_i32        0
+#define TCG_TARGET_HAS_ctpop_i32      1
+#define TCG_TARGET_HAS_deposit_i32    1
+#define TCG_TARGET_HAS_extract_i32    1
+#define TCG_TARGET_HAS_sextract_i32   0
+#define TCG_TARGET_HAS_extract2_i32   0
+#define TCG_TARGET_HAS_negsetcond_i32 1
+#define TCG_TARGET_HAS_add2_i32       1
+#define TCG_TARGET_HAS_sub2_i32       1
+#define TCG_TARGET_HAS_mulu2_i32      0
+#define TCG_TARGET_HAS_muls2_i32      0
+#define TCG_TARGET_HAS_muluh_i32      0
+#define TCG_TARGET_HAS_mulsh_i32      0
+#define TCG_TARGET_HAS_extr_i64_i32   0
+#define TCG_TARGET_HAS_qemu_st8_i32   0
+
+#define TCG_TARGET_HAS_div2_i64       1
+#define TCG_TARGET_HAS_rot_i64        1
+#define TCG_TARGET_HAS_ext8s_i64      1
+#define TCG_TARGET_HAS_ext16s_i64     1
+#define TCG_TARGET_HAS_ext32s_i64     1
+#define TCG_TARGET_HAS_ext8u_i64      1
+#define TCG_TARGET_HAS_ext16u_i64     1
+#define TCG_TARGET_HAS_ext32u_i64     1
+#define TCG_TARGET_HAS_bswap16_i64    1
+#define TCG_TARGET_HAS_bswap32_i64    1
+#define TCG_TARGET_HAS_bswap64_i64    1
+#define TCG_TARGET_HAS_not_i64        HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_andc_i64       HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_orc_i64        HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_eqv_i64        HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_nand_i64       HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_nor_i64        HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_clz_i64        1
+#define TCG_TARGET_HAS_ctz_i64        0
+#define TCG_TARGET_HAS_ctpop_i64      1
+#define TCG_TARGET_HAS_deposit_i64    1
+#define TCG_TARGET_HAS_extract_i64    1
+#define TCG_TARGET_HAS_sextract_i64   0
+#define TCG_TARGET_HAS_extract2_i64   0
+#define TCG_TARGET_HAS_negsetcond_i64 1
+#define TCG_TARGET_HAS_add2_i64       1
+#define TCG_TARGET_HAS_sub2_i64       1
+#define TCG_TARGET_HAS_mulu2_i64      1
+#define TCG_TARGET_HAS_muls2_i64      HAVE_FACILITY(MISC_INSN_EXT2)
+#define TCG_TARGET_HAS_muluh_i64      0
+#define TCG_TARGET_HAS_mulsh_i64      0
+
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
+
+#define TCG_TARGET_HAS_tst            1
+
+#define TCG_TARGET_HAS_v64            HAVE_FACILITY(VECTOR)
+#define TCG_TARGET_HAS_v128           HAVE_FACILITY(VECTOR)
+#define TCG_TARGET_HAS_v256           0
+
+#define TCG_TARGET_HAS_andc_vec       1
+#define TCG_TARGET_HAS_orc_vec        HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_nand_vec       HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_nor_vec        1
+#define TCG_TARGET_HAS_eqv_vec        HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_not_vec        1
+#define TCG_TARGET_HAS_neg_vec        1
+#define TCG_TARGET_HAS_abs_vec        1
+#define TCG_TARGET_HAS_roti_vec       1
+#define TCG_TARGET_HAS_rots_vec       1
+#define TCG_TARGET_HAS_rotv_vec       1
+#define TCG_TARGET_HAS_shi_vec        1
+#define TCG_TARGET_HAS_shs_vec        1
+#define TCG_TARGET_HAS_shv_vec        1
+#define TCG_TARGET_HAS_mul_vec        1
+#define TCG_TARGET_HAS_sat_vec        0
+#define TCG_TARGET_HAS_minmax_vec     1
+#define TCG_TARGET_HAS_bitsel_vec     1
+#define TCG_TARGET_HAS_cmpsel_vec     1
+#define TCG_TARGET_HAS_tst_vec        0
+
+#endif
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
 
 #define TCG_TARGET_NB_REGS 64
 
-/* Facilities required for proper operation; checked at startup. */
-
-#define FACILITY_ZARCH_ACTIVE         2
-#define FACILITY_LONG_DISP            18
-#define FACILITY_EXT_IMM              21
-#define FACILITY_GEN_INST_EXT         34
-#define FACILITY_45                   45
-
-/* Facilities that are checked at runtime. */
-
-#define FACILITY_LOAD_ON_COND2        53
-#define FACILITY_MISC_INSN_EXT2       58
-#define FACILITY_MISC_INSN_EXT3       61
-#define FACILITY_VECTOR               129
-#define FACILITY_VECTOR_ENH1          135
-
-extern uint64_t s390_facilities[3];
-
-#define HAVE_FACILITY(X) \
-    ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
-
-/* optional instructions */
-#define TCG_TARGET_HAS_div2_i32       1
-#define TCG_TARGET_HAS_rot_i32        1
-#define TCG_TARGET_HAS_ext8s_i32      1
-#define TCG_TARGET_HAS_ext16s_i32     1
-#define TCG_TARGET_HAS_ext8u_i32      1
-#define TCG_TARGET_HAS_ext16u_i32     1
-#define TCG_TARGET_HAS_bswap16_i32    1
-#define TCG_TARGET_HAS_bswap32_i32    1
-#define TCG_TARGET_HAS_not_i32        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_andc_i32       HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_orc_i32        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_eqv_i32        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_nand_i32       HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_nor_i32        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_clz_i32        0
-#define TCG_TARGET_HAS_ctz_i32        0
-#define TCG_TARGET_HAS_ctpop_i32      1
-#define TCG_TARGET_HAS_deposit_i32    1
-#define TCG_TARGET_HAS_extract_i32    1
-#define TCG_TARGET_HAS_sextract_i32   0
-#define TCG_TARGET_HAS_extract2_i32   0
-#define TCG_TARGET_HAS_negsetcond_i32 1
-#define TCG_TARGET_HAS_add2_i32       1
-#define TCG_TARGET_HAS_sub2_i32       1
-#define TCG_TARGET_HAS_mulu2_i32      0
-#define TCG_TARGET_HAS_muls2_i32      0
-#define TCG_TARGET_HAS_muluh_i32      0
-#define TCG_TARGET_HAS_mulsh_i32      0
-#define TCG_TARGET_HAS_extr_i64_i32   0
-#define TCG_TARGET_HAS_qemu_st8_i32   0
-
-#define TCG_TARGET_HAS_div2_i64       1
-#define TCG_TARGET_HAS_rot_i64        1
-#define TCG_TARGET_HAS_ext8s_i64      1
-#define TCG_TARGET_HAS_ext16s_i64     1
-#define TCG_TARGET_HAS_ext32s_i64     1
-#define TCG_TARGET_HAS_ext8u_i64      1
-#define TCG_TARGET_HAS_ext16u_i64     1
-#define TCG_TARGET_HAS_ext32u_i64     1
-#define TCG_TARGET_HAS_bswap16_i64    1
-#define TCG_TARGET_HAS_bswap32_i64    1
-#define TCG_TARGET_HAS_bswap64_i64    1
-#define TCG_TARGET_HAS_not_i64        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_andc_i64       HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_orc_i64        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_eqv_i64        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_nand_i64       HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_nor_i64        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_clz_i64        1
-#define TCG_TARGET_HAS_ctz_i64        0
-#define TCG_TARGET_HAS_ctpop_i64      1
-#define TCG_TARGET_HAS_deposit_i64    1
-#define TCG_TARGET_HAS_extract_i64    1
-#define TCG_TARGET_HAS_sextract_i64   0
-#define TCG_TARGET_HAS_extract2_i64   0
-#define TCG_TARGET_HAS_negsetcond_i64 1
-#define TCG_TARGET_HAS_add2_i64       1
-#define TCG_TARGET_HAS_sub2_i64       1
-#define TCG_TARGET_HAS_mulu2_i64      1
-#define TCG_TARGET_HAS_muls2_i64      HAVE_FACILITY(MISC_INSN_EXT2)
-#define TCG_TARGET_HAS_muluh_i64      0
-#define TCG_TARGET_HAS_mulsh_i64      0
-
-#define TCG_TARGET_HAS_qemu_ldst_i128 1
-
-#define TCG_TARGET_HAS_tst            1
-
-#define TCG_TARGET_HAS_v64            HAVE_FACILITY(VECTOR)
-#define TCG_TARGET_HAS_v128           HAVE_FACILITY(VECTOR)
-#define TCG_TARGET_HAS_v256           0
-
-#define TCG_TARGET_HAS_andc_vec       1
-#define TCG_TARGET_HAS_orc_vec        HAVE_FACILITY(VECTOR_ENH1)
-#define TCG_TARGET_HAS_nand_vec       HAVE_FACILITY(VECTOR_ENH1)
-#define TCG_TARGET_HAS_nor_vec        1
-#define TCG_TARGET_HAS_eqv_vec        HAVE_FACILITY(VECTOR_ENH1)
-#define TCG_TARGET_HAS_not_vec        1
-#define TCG_TARGET_HAS_neg_vec        1
-#define TCG_TARGET_HAS_abs_vec        1
-#define TCG_TARGET_HAS_roti_vec       1
-#define TCG_TARGET_HAS_rots_vec       1
-#define TCG_TARGET_HAS_rotv_vec       1
-#define TCG_TARGET_HAS_shi_vec        1
-#define TCG_TARGET_HAS_shs_vec        1
-#define TCG_TARGET_HAS_shv_vec        1
-#define TCG_TARGET_HAS_mul_vec        1
-#define TCG_TARGET_HAS_sat_vec        0
-#define TCG_TARGET_HAS_minmax_vec     1
-#define TCG_TARGET_HAS_bitsel_vec     1
-#define TCG_TARGET_HAS_cmpsel_vec     1
-#define TCG_TARGET_HAS_tst_vec        0
+#include "tcg-target-has.h"
 
 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-12-philmd@linaro.org>
---
 tcg/sparc64/tcg-target-has.h | 86 ++++++++++++++++++++++++++++++++++++
 tcg/sparc64/tcg-target.h     | 78 +-------------------------------
 2 files changed, 88 insertions(+), 76 deletions(-)
 create mode 100644 tcg/sparc64/tcg-target-has.h

diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/sparc64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+#if defined(__VIS__) && __VIS__ >= 0x300
+#define use_vis3_instructions  1
+#else
+extern bool use_vis3_instructions;
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32		1
+#define TCG_TARGET_HAS_rem_i32		0
+#define TCG_TARGET_HAS_rot_i32          0
+#define TCG_TARGET_HAS_ext8s_i32        0
+#define TCG_TARGET_HAS_ext16s_i32       0
+#define TCG_TARGET_HAS_ext8u_i32        0
+#define TCG_TARGET_HAS_ext16u_i32       0
+#define TCG_TARGET_HAS_bswap16_i32      0
+#define TCG_TARGET_HAS_bswap32_i32      0
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_clz_i32          0
+#define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_negsetcond_i32   1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+#define TCG_TARGET_HAS_extr_i64_i32     0
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          0
+#define TCG_TARGET_HAS_rot_i64          0
+#define TCG_TARGET_HAS_ext8s_i64        0
+#define TCG_TARGET_HAS_ext16s_i64       0
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        0
+#define TCG_TARGET_HAS_ext16u_i64       0
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      0
+#define TCG_TARGET_HAS_bswap32_i64      0
+#define TCG_TARGET_HAS_bswap64_i64      0
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_orc_i64          1
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          0
+#define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_negsetcond_i64   1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        use_vis3_instructions
+#define TCG_TARGET_HAS_mulsh_i64        0
+
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
+#define TCG_TARGET_HAS_tst              1
+
+#endif
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_I7,
 } TCGReg;
 
-#if defined(__VIS__) && __VIS__ >= 0x300
-#define use_vis3_instructions  1
-#else
-extern bool use_vis3_instructions;
-#endif
-
-/* optional instructions */
-#define TCG_TARGET_HAS_div_i32		1
-#define TCG_TARGET_HAS_rem_i32		0
-#define TCG_TARGET_HAS_rot_i32          0
-#define TCG_TARGET_HAS_ext8s_i32        0
-#define TCG_TARGET_HAS_ext16s_i32       0
-#define TCG_TARGET_HAS_ext8u_i32        0
-#define TCG_TARGET_HAS_ext16u_i32       0
-#define TCG_TARGET_HAS_bswap16_i32      0
-#define TCG_TARGET_HAS_bswap32_i32      0
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_andc_i32         1
-#define TCG_TARGET_HAS_orc_i32          1
-#define TCG_TARGET_HAS_eqv_i32          0
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_clz_i32          0
-#define TCG_TARGET_HAS_ctz_i32          0
-#define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_extract_i32      0
-#define TCG_TARGET_HAS_sextract_i32     0
-#define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_negsetcond_i32   1
-#define TCG_TARGET_HAS_add2_i32         1
-#define TCG_TARGET_HAS_sub2_i32         1
-#define TCG_TARGET_HAS_mulu2_i32        1
-#define TCG_TARGET_HAS_muls2_i32        1
-#define TCG_TARGET_HAS_muluh_i32        0
-#define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-#define TCG_TARGET_HAS_extr_i64_i32     0
-#define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          0
-#define TCG_TARGET_HAS_rot_i64          0
-#define TCG_TARGET_HAS_ext8s_i64        0
-#define TCG_TARGET_HAS_ext16s_i64       0
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        0
-#define TCG_TARGET_HAS_ext16u_i64       0
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_bswap16_i64      0
-#define TCG_TARGET_HAS_bswap32_i64      0
-#define TCG_TARGET_HAS_bswap64_i64      0
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_andc_i64         1
-#define TCG_TARGET_HAS_orc_i64          1
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_clz_i64          0
-#define TCG_TARGET_HAS_ctz_i64          0
-#define TCG_TARGET_HAS_ctpop_i64        0
-#define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      0
-#define TCG_TARGET_HAS_sextract_i64     0
-#define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_negsetcond_i64   1
-#define TCG_TARGET_HAS_add2_i64         1
-#define TCG_TARGET_HAS_sub2_i64         1
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i64        0
-#define TCG_TARGET_HAS_muluh_i64        use_vis3_instructions
-#define TCG_TARGET_HAS_mulsh_i64        0
-
-#define TCG_TARGET_HAS_qemu_ldst_i128   0
-
-#define TCG_TARGET_HAS_tst              1
-
 #define TCG_AREG0 TCG_REG_I0
 
+#include "tcg-target-has.h"
+
 #define TCG_TARGET_DEFAULT_MO (0)
 
 #endif
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-13-philmd@linaro.org>
---
 tcg/tci/tcg-target-has.h | 83 ++++++++++++++++++++++++++++++++++++++++
 tcg/tci/tcg-target.h     | 75 +-----------------------------------
 2 files changed, 84 insertions(+), 74 deletions(-)
 create mode 100644 tcg/tci/tcg-target-has.h

diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/tci/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific opcode support
+ * Copyright (c) 2009, 2011 Stefan Weil
+ */
+
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_div_i32          1
+#define TCG_TARGET_HAS_rem_i32          1
+#define TCG_TARGET_HAS_ext8s_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
+#define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16u_i32       1
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_eqv_i32          1
+#define TCG_TARGET_HAS_nand_i32         1
+#define TCG_TARGET_HAS_nor_i32          1
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          1
+#define TCG_TARGET_HAS_ctpop_i32        1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_negsetcond_i32   0
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_extr_i64_i32     0
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     1
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          1
+#define TCG_TARGET_HAS_ext8s_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_eqv_i64          1
+#define TCG_TARGET_HAS_nand_i64         1
+#define TCG_TARGET_HAS_nor_i64          1
+#define TCG_TARGET_HAS_clz_i64          1
+#define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_ctpop_i64        1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_orc_i64          1
+#define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_negsetcond_i64   0
+#define TCG_TARGET_HAS_muls2_i64        1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        1
+#define TCG_TARGET_HAS_muluh_i64        0
+#define TCG_TARGET_HAS_mulsh_i64        0
+#else
+#define TCG_TARGET_HAS_mulu2_i32        1
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
+#define TCG_TARGET_HAS_tst              1
+
+#endif
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
-/* Optional instructions. */
-
-#define TCG_TARGET_HAS_bswap16_i32      1
-#define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_div_i32          1
-#define TCG_TARGET_HAS_rem_i32          1
-#define TCG_TARGET_HAS_ext8s_i32        1
-#define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_ext8u_i32        1
-#define TCG_TARGET_HAS_ext16u_i32       1
-#define TCG_TARGET_HAS_andc_i32         1
-#define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
-#define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_eqv_i32          1
-#define TCG_TARGET_HAS_nand_i32         1
-#define TCG_TARGET_HAS_nor_i32          1
-#define TCG_TARGET_HAS_clz_i32          1
-#define TCG_TARGET_HAS_ctz_i32          1
-#define TCG_TARGET_HAS_ctpop_i32        1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_orc_i32          1
-#define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_negsetcond_i32   0
-#define TCG_TARGET_HAS_muls2_i32        1
-#define TCG_TARGET_HAS_muluh_i32        0
-#define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-#if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_extr_i64_i32     0
-#define TCG_TARGET_HAS_bswap16_i64      1
-#define TCG_TARGET_HAS_bswap32_i64      1
-#define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
-#define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          1
-#define TCG_TARGET_HAS_ext8s_i64        1
-#define TCG_TARGET_HAS_ext16s_i64       1
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        1
-#define TCG_TARGET_HAS_ext16u_i64       1
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_andc_i64         1
-#define TCG_TARGET_HAS_eqv_i64          1
-#define TCG_TARGET_HAS_nand_i64         1
-#define TCG_TARGET_HAS_nor_i64          1
-#define TCG_TARGET_HAS_clz_i64          1
-#define TCG_TARGET_HAS_ctz_i64          1
-#define TCG_TARGET_HAS_ctpop_i64        1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_orc_i64          1
-#define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_negsetcond_i64   0
-#define TCG_TARGET_HAS_muls2_i64        1
-#define TCG_TARGET_HAS_add2_i32         1
-#define TCG_TARGET_HAS_sub2_i32         1
-#define TCG_TARGET_HAS_mulu2_i32        1
-#define TCG_TARGET_HAS_add2_i64         1
-#define TCG_TARGET_HAS_sub2_i64         1
-#define TCG_TARGET_HAS_mulu2_i64        1
-#define TCG_TARGET_HAS_muluh_i64        0
-#define TCG_TARGET_HAS_mulsh_i64        0
-#else
-#define TCG_TARGET_HAS_mulu2_i32        1
-#endif /* TCG_TARGET_REG_BITS == 64 */
-
-#define TCG_TARGET_HAS_qemu_ldst_i128   0
-
-#define TCG_TARGET_HAS_tst              1
+#include "tcg-target-has.h"
 
 /* Number of registers available. */
 #define TCG_TARGET_NB_REGS 16
-- 
2.43.0

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250108215156.8731-14-philmd@linaro.org>
---
 tcg/aarch64/tcg-target.h     | 2 --
 tcg/arm/tcg-target.h         | 2 --
 tcg/i386/tcg-target.h        | 2 --
 tcg/loongarch64/tcg-target.h | 2 --
 tcg/mips/tcg-target.h        | 2 --
 tcg/ppc/tcg-target.h         | 2 --
 tcg/riscv/tcg-target.h       | 2 --
 tcg/s390x/tcg-target.h       | 2 --
 tcg/sparc64/tcg-target.h     | 2 --
 tcg/tcg-has.h                | 2 ++
 tcg/tci/tcg-target.h         | 2 --
 11 files changed, 2 insertions(+), 20 deletions(-)

TCG_TARGET_HAS_* definitions don't need to be exposed
by "tcg/tcg.h". Only include 'tcg-has.h' when necessary.

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
 #error unsupported
 #endif
 
-#include "tcg/tcg-has.h"
-
 typedef enum TCGOpcode {
 #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
 #include "tcg/tcg-opc.h"
diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@
 #include "qemu/interval-tree.h"
 #include "tcg/tcg-op-common.h"
 #include "tcg-internal.h"
+#include "tcg-has.h"
 
 #define CASE_OP_32_64(x)                        \
         glue(glue(case INDEX_op_, x), _i32):    \
diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-common.c
+++ b/tcg/tcg-common.c
@@ -XXX,XX +XXX,XX @@
 
 #include "qemu/osdep.h"
 #include "tcg/tcg.h"
+#include "tcg-has.h"
 
 TCGOpDef tcg_op_defs[] = {
 #define DEF(s, oargs, iargs, cargs, flags) \
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg/tcg-op-common.h"
 #include "tcg/tcg-op-gvec-common.h"
 #include "tcg/tcg-gvec-desc.h"
+#include "tcg-has.h"
 
 #define MAX_UNROLL  4
 
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -XXX,XX +XXX,XX @@
 #include "exec/translation-block.h"
 #include "exec/plugin-gen.h"
 #include "tcg-internal.h"
-
+#include "tcg-has.h"
 
 static void check_max_alignment(unsigned a_bits)
 {
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg/tcg-op-common.h"
 #include "tcg/tcg-mo.h"
 #include "tcg-internal.h"
+#include "tcg-has.h"
 
 /*
  * Vector optional opcode tracking.
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -XXX,XX +XXX,XX @@
 #include "exec/translation-block.h"
 #include "exec/plugin-gen.h"
 #include "tcg-internal.h"
-
+#include "tcg-has.h"
 
 /*
  * Encourage the compiler to tail-call to a function, rather than inlining.
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg/tcg-temp-internal.h"
 #include "tcg-internal.h"
 #include "tcg/perf.h"
+#include "tcg-has.h"
 #ifdef CONFIG_USER_ONLY
 #include "user/guest-base.h"
 #endif
diff --git a/tcg/tci.c b/tcg/tci.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg/helper-info.h"
 #include "tcg/tcg-ldst.h"
 #include "disas/dis-asm.h"
+#include "tcg-has.h"
 #include <ffi.h>
 
 
-- 
2.43.0

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/internal-target.h     |  1 +
 tcg/aarch64/tcg-target-mo.h     | 12 ++++++++++++
 tcg/aarch64/tcg-target.h        |  2 --
 tcg/arm/tcg-target-mo.h         | 13 +++++++++++++
 tcg/arm/tcg-target.h            |  2 --
 tcg/i386/tcg-target-mo.h        | 19 +++++++++++++++++++
 tcg/i386/tcg-target.h           | 11 -----------
 tcg/loongarch64/tcg-target-mo.h | 12 ++++++++++++
 tcg/loongarch64/tcg-target.h    |  2 --
 tcg/mips/tcg-target-mo.h        | 13 +++++++++++++
 tcg/mips/tcg-target.h           |  2 --
 tcg/ppc/tcg-target-mo.h         | 12 ++++++++++++
 tcg/ppc/tcg-target.h            |  2 --
 tcg/riscv/tcg-target-mo.h       | 12 ++++++++++++
 tcg/riscv/tcg-target.h          |  2 --
 tcg/s390x/tcg-target-mo.h       | 12 ++++++++++++
 tcg/s390x/tcg-target.h          |  2 --
 tcg/sparc64/tcg-target-mo.h     | 12 ++++++++++++
 tcg/sparc64/tcg-target.h        |  2 --
 tcg/tci/tcg-target-mo.h         | 17 +++++++++++++++++
 tcg/tci/tcg-target.h            |  5 -----
 tcg/tcg-op-ldst.c               |  1 +
 22 files changed, 136 insertions(+), 32 deletions(-)
 create mode 100644 tcg/aarch64/tcg-target-mo.h
 create mode 100644 tcg/arm/tcg-target-mo.h
 create mode 100644 tcg/i386/tcg-target-mo.h
 create mode 100644 tcg/loongarch64/tcg-target-mo.h
 create mode 100644 tcg/mips/tcg-target-mo.h
 create mode 100644 tcg/ppc/tcg-target-mo.h
 create mode 100644 tcg/riscv/tcg-target-mo.h
 create mode 100644 tcg/s390x/tcg-target-mo.h
 create mode 100644 tcg/sparc64/tcg-target-mo.h
 create mode 100644 tcg/tci/tcg-target-mo.h

diff --git a/accel/tcg/internal-target.h b/accel/tcg/internal-target.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/internal-target.h
+++ b/accel/tcg/internal-target.h
@@ -XXX,XX +XXX,XX @@
 #include "exec/exec-all.h"
 #include "exec/translation-block.h"
 #include "tb-internal.h"
+#include "tcg-target-mo.h"
 
 /*
  * Access to the various translations structures need to be serialised
diff --git a/tcg/aarch64/tcg-target-mo.h b/tcg/aarch64/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/aarch64/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+#define TCG_TARGET_DEFAULT_MO  0
+
+#endif
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 
 #define TCG_TARGET_NB_REGS 64
 
-#define TCG_TARGET_DEFAULT_MO (0)
-
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/arm/tcg-target-mo.h b/tcg/arm/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/arm/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2008 Fabrice Bellard
+ * Copyright (c) 2008 Andrzej Zaborowski
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+#define TCG_TARGET_DEFAULT_MO  0
+
+#endif
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 
 #define TCG_TARGET_NB_REGS 32
 
-#define TCG_TARGET_DEFAULT_MO (0)
-
 #endif
diff --git a/tcg/i386/tcg-target-mo.h b/tcg/i386/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/i386/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+/*
+ * This defines the natural memory order supported by this architecture
+ * before guarantees made by various barrier instructions.
+ *
+ * The x86 has a pretty strong memory ordering which only really
+ * allows for some stores to be re-ordered after loads.
+ */
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+
+#endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_CALL_STACK = TCG_REG_ESP
 } TCGReg;
 
-/* This defines the natural memory order supported by this
- * architecture before guarantees made by various barrier
- * instructions.
- *
- * The x86 has a pretty strong memory ordering which only really
- * allows for some stores to be re-ordered after loads.
- */
-#include "tcg/tcg-mo.h"
-
-#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
-
 #endif
diff --git a/tcg/loongarch64/tcg-target-mo.h b/tcg/loongarch64/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/loongarch64/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+#define TCG_TARGET_DEFAULT_MO  0
+
+#endif
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_VEC_TMP0 = TCG_REG_V23,
 } TCGReg;
 
-#define TCG_TARGET_DEFAULT_MO (0)
-
 #endif /* LOONGARCH_TCG_TARGET_H */
diff --git a/tcg/mips/tcg-target-mo.h b/tcg/mips/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/mips/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+#define TCG_TARGET_DEFAULT_MO  0
+
+#endif
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_AREG0 = TCG_REG_S8,
 } TCGReg;
 
-#define TCG_TARGET_DEFAULT_MO           0
-
 #endif
diff --git a/tcg/ppc/tcg-target-mo.h b/tcg/ppc/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/ppc/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+#define TCG_TARGET_DEFAULT_MO  0
+
+#endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_AREG0 = TCG_REG_R27
 } TCGReg;
 
-#define TCG_TARGET_DEFAULT_MO (0)
-
 #endif
diff --git a/tcg/riscv/tcg-target-mo.h b/tcg/riscv/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/riscv/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2018 SiFive, Inc
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+#define TCG_TARGET_DEFAULT_MO  0
+
+#endif
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
     TCG_REG_TMP2       = TCG_REG_T4,
 } TCGReg;
 
-#define TCG_TARGET_DEFAULT_MO (0)
-
 #endif
diff --git a/tcg/s390x/tcg-target-mo.h b/tcg/s390x/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/s390x/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+
+#endif
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
 
 #define TCG_TARGET_NB_REGS 64
 
-#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
-
 #endif
diff --git a/tcg/sparc64/tcg-target-mo.h b/tcg/sparc64/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/sparc64/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+#define TCG_TARGET_DEFAULT_MO  0
+
+#endif
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 
 #define TCG_AREG0 TCG_REG_I0
 
-#define TCG_TARGET_DEFAULT_MO (0)
-
 #endif
diff --git a/tcg/tci/tcg-target-mo.h b/tcg/tci/tcg-target-mo.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tcg/tci/tcg-target-mo.h
@@ -XXX,XX +XXX,XX @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific memory model
+ * Copyright (c) 2009, 2011 Stefan Weil
+ */
+
+#ifndef TCG_TARGET_MO_H
+#define TCG_TARGET_MO_H
+
+/*
+ * We could notice __i386__ or __s390x__ and reduce the barriers depending
+ * on the host.  But if you want performance, you use the normal backend.
+ * We prefer consistency across hosts on this.
+ */
+#define TCG_TARGET_DEFAULT_MO  0
+
+#endif
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 #define HAVE_TCG_QEMU_TB_EXEC
 #define TCG_TARGET_NEED_POOL_LABELS
 
-/* We could notice __i386__ or __s390x__ and reduce the barriers depending
-   on the host.  But if you want performance, you use the normal backend.
-   We prefer consistency across hosts on this.  */
-#define TCG_TARGET_DEFAULT_MO  (0)
-
 #endif /* TCG_TARGET_H */
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -XXX,XX +XXX,XX @@
 #include "exec/plugin-gen.h"
 #include "tcg-internal.h"
 #include "tcg-has.h"
+#include "tcg-target-mo.h"
 
 static void check_max_alignment(unsigned a_bits)
 {
-- 
2.43.0

Return C_NotImplemented instead of asserting for opcodes
not implemented by the backend.  For now, the assertion
moves to process_op_defs.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg.c                        | 10 ++++++----
 tcg/aarch64/tcg-target.c.inc     |  2 +-
 tcg/arm/tcg-target.c.inc         |  2 +-
 tcg/i386/tcg-target.c.inc        |  2 +-
 tcg/loongarch64/tcg-target.c.inc |  2 +-
 tcg/mips/tcg-target.c.inc        |  2 +-
 tcg/ppc/tcg-target.c.inc         |  2 +-
 tcg/riscv/tcg-target.c.inc       |  2 +-
 tcg/s390x/tcg-target.c.inc       |  2 +-
 tcg/sparc64/tcg-target.c.inc     |  2 +-
 tcg/tci/tcg-target.c.inc         |  2 +-
 11 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static int tcg_out_pool_finalize(TCGContext *s)
 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
 
 typedef enum {
+    C_NotImplemented = -1,
 #include "tcg-target-con-set.h"
 } TCGConstraintSetIndex;
 
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
         const TCGTargetOpDef *tdefs;
         bool saw_alias_pair = false;
         int i, o, i2, o2, nb_args;
+        TCGConstraintSetIndex con_set;
 
         if (def->flags & TCG_OPF_NOT_PRESENT) {
             continue;
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
 
         /*
          * Macro magic should make it impossible, but double-check that
-         * the array index is in range.  Since the signness of an enum
-         * is implementation defined, force the result to unsigned.
+         * the array index is in range.  At the same time, double-check
+         * that the opcode is implemented, i.e. not C_NotImplemented.
          */
-        unsigned con_set = tcg_target_op_def(op);
-        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
+        con_set = tcg_target_op_def(op);
+        tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
         tdefs = &constraint_sets[con_set];
 
         for (i = 0; i < nb_args; i++) {
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I2(w, 0, w);
 
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_bitsel_vec:
         return C_O1_I3(w, w, w, w);
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I4(x, x, x, xO, x);
 
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I3(w, w, w, w);
 
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
                 : C_O0_I4(rZ, rZ, r, r));
 
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I4(v, v, v, vZM, v);
 
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_cmpsel_vec:
         return C_O1_I4(v, v, vL, vK, vK);
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
                 : C_O1_I4(v, v, v, vZ, v));
 
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I2(r, r, r);
 
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
 
     default:
-        g_assert_not_reached();
+        return C_NotImplemented;
     }
 }
 
-- 
2.43.0

Test each vector type, not just lumping them all together.
Add tests for I32 (always true) and I64 (64-bit hosts).

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg.c | 66 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcgv_i32_temp(TCGv_i32 v)
  */
 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
 {
-    const bool have_vec
-        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
+    bool has_type;
+
+    switch (type) {
+    case TCG_TYPE_I32:
+        has_type = true;
+        break;
+    case TCG_TYPE_I64:
+        has_type = TCG_TARGET_REG_BITS == 64;
+        break;
+    case TCG_TYPE_V64:
+        has_type = TCG_TARGET_HAS_v64;
+        break;
+    case TCG_TYPE_V128:
+        has_type = TCG_TARGET_HAS_v128;
+        break;
+    case TCG_TYPE_V256:
+        has_type = TCG_TARGET_HAS_v256;
+        break;
+    default:
+        has_type = false;
+        break;
+    }
 
     switch (op) {
     case INDEX_op_discard:
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_or_vec:
     case INDEX_op_xor_vec:
     case INDEX_op_cmp_vec:
-        return have_vec;
+        return has_type;
     case INDEX_op_dup2_vec:
-        return have_vec && TCG_TARGET_REG_BITS == 32;
+        return has_type && TCG_TARGET_REG_BITS == 32;
     case INDEX_op_not_vec:
-        return have_vec && TCG_TARGET_HAS_not_vec;
+        return has_type && TCG_TARGET_HAS_not_vec;
     case INDEX_op_neg_vec:
-        return have_vec && TCG_TARGET_HAS_neg_vec;
+        return has_type && TCG_TARGET_HAS_neg_vec;
     case INDEX_op_abs_vec:
-        return have_vec && TCG_TARGET_HAS_abs_vec;
+        return has_type && TCG_TARGET_HAS_abs_vec;
     case INDEX_op_andc_vec:
-        return have_vec && TCG_TARGET_HAS_andc_vec;
+        return has_type && TCG_TARGET_HAS_andc_vec;
     case INDEX_op_orc_vec:
-        return have_vec && TCG_TARGET_HAS_orc_vec;
+        return has_type && TCG_TARGET_HAS_orc_vec;
     case INDEX_op_nand_vec:
-        return have_vec && TCG_TARGET_HAS_nand_vec;
+        return has_type && TCG_TARGET_HAS_nand_vec;
     case INDEX_op_nor_vec:
-        return have_vec && TCG_TARGET_HAS_nor_vec;
+        return has_type && TCG_TARGET_HAS_nor_vec;
     case INDEX_op_eqv_vec:
-        return have_vec && TCG_TARGET_HAS_eqv_vec;
+        return has_type && TCG_TARGET_HAS_eqv_vec;
     case INDEX_op_mul_vec:
-        return have_vec && TCG_TARGET_HAS_mul_vec;
+        return has_type && TCG_TARGET_HAS_mul_vec;
     case INDEX_op_shli_vec:
     case INDEX_op_shri_vec:
     case INDEX_op_sari_vec:
-        return have_vec && TCG_TARGET_HAS_shi_vec;
+        return has_type && TCG_TARGET_HAS_shi_vec;
     case INDEX_op_shls_vec:
     case INDEX_op_shrs_vec:
     case INDEX_op_sars_vec:
-        return have_vec && TCG_TARGET_HAS_shs_vec;
+        return has_type && TCG_TARGET_HAS_shs_vec;
     case INDEX_op_shlv_vec:
     case INDEX_op_shrv_vec:
     case INDEX_op_sarv_vec:
-        return have_vec && TCG_TARGET_HAS_shv_vec;
+        return has_type && TCG_TARGET_HAS_shv_vec;
     case INDEX_op_rotli_vec:
-        return have_vec && TCG_TARGET_HAS_roti_vec;
+        return has_type && TCG_TARGET_HAS_roti_vec;
     case INDEX_op_rotls_vec:
-        return have_vec && TCG_TARGET_HAS_rots_vec;
+        return has_type && TCG_TARGET_HAS_rots_vec;
     case INDEX_op_rotlv_vec:
     case INDEX_op_rotrv_vec:
-        return have_vec && TCG_TARGET_HAS_rotv_vec;
+        return has_type && TCG_TARGET_HAS_rotv_vec;
     case INDEX_op_ssadd_vec:
     case INDEX_op_usadd_vec:
     case INDEX_op_sssub_vec:
     case INDEX_op_ussub_vec:
-        return have_vec && TCG_TARGET_HAS_sat_vec;
+        return has_type && TCG_TARGET_HAS_sat_vec;
     case INDEX_op_smin_vec:
     case INDEX_op_umin_vec:
     case INDEX_op_smax_vec:
     case INDEX_op_umax_vec:
-        return have_vec && TCG_TARGET_HAS_minmax_vec;
+        return has_type && TCG_TARGET_HAS_minmax_vec;
     case INDEX_op_bitsel_vec:
-        return have_vec && TCG_TARGET_HAS_bitsel_vec;
+        return has_type && TCG_TARGET_HAS_bitsel_vec;
     case INDEX_op_cmpsel_vec:
-        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
+        return has_type && TCG_TARGET_HAS_cmpsel_vec;
 
     default:
         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
-- 
2.43.0

Process each TCGConstraintSetIndex first.  Allocate TCGArgConstraint
arrays based on those.  Only afterward process the TCGOpcodes and
share those TCGArgConstraint arrays.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h |   7 +-
 tcg/tcg.c         | 272 +++++++++++++++++++++++-----------------------
 2 files changed, 136 insertions(+), 143 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
     const char *name;
     uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
     uint8_t flags;
-    TCGArgConstraint *args_ct;
+    const TCGArgConstraint *args_ct;
 } TCGOpDef;
 
 extern TCGOpDef tcg_op_defs[];
 extern const size_t tcg_op_defs_max;
 
-typedef struct TCGTargetOpDef {
-    TCGOpcode op;
-    const char *args_ct_str[TCG_MAX_OP_ARGS];
-} TCGTargetOpDef;
-
 /*
  * tcg_op_supported:
  * Query if @op, for @type and @flags, is supported by the host
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 
 /* Put all of the constraint sets into an array, indexed by the enum. */
 
-#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
-#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
-#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
-#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
+typedef struct TCGConstraintSet {
+    uint8_t nb_oargs, nb_iargs;
+    const char *args_ct_str[TCG_MAX_OP_ARGS];
+} TCGConstraintSet;
 
-#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
-#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
-#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
-#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
+#define C_O0_I1(I1)                     { 0, 1, { #I1 } },
+#define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
+#define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
+#define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
 
-#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
-#define C_N1O1_I1(O1, O2, I1)           { .args_ct_str = { "&" #O1, #O2, #I1 } },
-#define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
+#define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
+#define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
+#define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
+#define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
 
-#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
-#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
-#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
-#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
-#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
+#define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
+#define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
+#define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
 
-static const TCGTargetOpDef constraint_sets[] = {
+#define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
+#define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
+#define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
+#define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
+#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
+
+static const TCGConstraintSet constraint_sets[] = {
 #include "tcg-target-con-set.h"
 };
 
-
 #undef C_O0_I1
 #undef C_O0_I2
 #undef C_O0_I3
@@ -XXX,XX +XXX,XX @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 static void tcg_context_init(unsigned max_cpus)
 {
     TCGContext *s = &tcg_init_ctx;
-    int op, total_args, n, i;
-    TCGOpDef *def;
-    TCGArgConstraint *args_ct;
+    int n, i;
     TCGTemp *ts;
 
     memset(s, 0, sizeof(*s));
     s->nb_globals = 0;
 
-    /* Count total number of arguments and allocate the corresponding
-       space */
-    total_args = 0;
-    for(op = 0; op < NB_OPS; op++) {
-        def = &tcg_op_defs[op];
-        n = def->nb_iargs + def->nb_oargs;
-        total_args += n;
-    }
-
-    args_ct = g_new0(TCGArgConstraint, total_args);
-
-    for(op = 0; op < NB_OPS; op++) {
-        def = &tcg_op_defs[op];
-        def->args_ct = args_ct;
-        n = def->nb_iargs + def->nb_oargs;
-        args_ct += n;
-    }
-
     init_call_layout(&info_helper_ld32_mmu);
     init_call_layout(&info_helper_ld64_mmu);
     init_call_layout(&info_helper_ld128_mmu);
@@ -XXX,XX +XXX,XX @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
 }
 
 /* we give more priority to constraints with less registers */
-static int get_constraint_priority(const TCGOpDef *def, int k)
+static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
 {
-    const TCGArgConstraint *arg_ct = &def->args_ct[k];
-    int n = ctpop64(arg_ct->regs);
+    int n;
+
+    arg_ct += k;
+    n = ctpop64(arg_ct->regs);
 
     /*
      * Sort constraints of a single register first, which includes output
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
 }
 
 /* sort from highest priority to lowest */
-static void sort_constraints(TCGOpDef *def, int start, int n)
+static void sort_constraints(TCGArgConstraint *a, int start, int n)
 {
     int i, j;
-    TCGArgConstraint *a = def->args_ct;
 
     for (i = 0; i < n; i++) {
         a[start + i].sort_index = start + i;
@@ -XXX,XX +XXX,XX @@ static void sort_constraints(TCGOpDef *def, int start, int n)
     }
     for (i = 0; i < n - 1; i++) {
         for (j = i + 1; j < n; j++) {
-            int p1 = get_constraint_priority(def, a[start + i].sort_index);
-            int p2 = get_constraint_priority(def, a[start + j].sort_index);
+            int p1 = get_constraint_priority(a, a[start + i].sort_index);
+            int p2 = get_constraint_priority(a, a[start + j].sort_index);
             if (p1 < p2) {
                 int tmp = a[start + i].sort_index;
                 a[start + i].sort_index = a[start + j].sort_index;
@@ -XXX,XX +XXX,XX @@ static void sort_constraints(TCGOpDef *def, int start, int n)
     }
 }
 
+static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
+static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
+
 static void process_op_defs(TCGContext *s)
 {
-    TCGOpcode op;
-
-    for (op = 0; op < NB_OPS; op++) {
-        TCGOpDef *def = &tcg_op_defs[op];
-        const TCGTargetOpDef *tdefs;
+    for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
+        const TCGConstraintSet *tdefs = &constraint_sets[c];
+        TCGArgConstraint *args_ct = all_cts[c];
+        int nb_oargs = tdefs->nb_oargs;
+        int nb_iargs = tdefs->nb_iargs;
+        int nb_args = nb_oargs + nb_iargs;
         bool saw_alias_pair = false;
-        int i, o, i2, o2, nb_args;
-        TCGConstraintSetIndex con_set;
 
-        if (def->flags & TCG_OPF_NOT_PRESENT) {
-            continue;
-        }
-
-        nb_args = def->nb_iargs + def->nb_oargs;
-        if (nb_args == 0) {
-            continue;
-        }
-
-        /*
-         * Macro magic should make it impossible, but double-check that
-         * the array index is in range.  At the same time, double-check
-         * that the opcode is implemented, i.e. not C_NotImplemented.
-         */
-        con_set = tcg_target_op_def(op);
-        tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
-        tdefs = &constraint_sets[con_set];
-
-        for (i = 0; i < nb_args; i++) {
+        for (int i = 0; i < nb_args; i++) {
             const char *ct_str = tdefs->args_ct_str[i];
-            bool input_p = i >= def->nb_oargs;
-
-            /* Incomplete TCGTargetOpDef entry. */
-            tcg_debug_assert(ct_str != NULL);
+            bool input_p = i >= nb_oargs;
+            int o;
 
             switch (*ct_str) {
             case '0' ... '9':
                 o = *ct_str - '0';
                 tcg_debug_assert(input_p);
-                tcg_debug_assert(o < def->nb_oargs);
-                tcg_debug_assert(def->args_ct[o].regs != 0);
-                tcg_debug_assert(!def->args_ct[o].oalias);
-                def->args_ct[i] = def->args_ct[o];
+                tcg_debug_assert(o < nb_oargs);
+                tcg_debug_assert(args_ct[o].regs != 0);
+                tcg_debug_assert(!args_ct[o].oalias);
+                args_ct[i] = args_ct[o];
                 /* The output sets oalias.  */
-                def->args_ct[o].oalias = 1;
-                def->args_ct[o].alias_index = i;
+                args_ct[o].oalias = 1;
+                args_ct[o].alias_index = i;
                 /* The input sets ialias. */
-                def->args_ct[i].ialias = 1;
-                def->args_ct[i].alias_index = o;
-                if (def->args_ct[i].pair) {
+                args_ct[i].ialias = 1;
+                args_ct[i].alias_index = o;
+                if (args_ct[i].pair) {
                     saw_alias_pair = true;
                 }
                 tcg_debug_assert(ct_str[1] == '\0');
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
 
             case '&':
                 tcg_debug_assert(!input_p);
-                def->args_ct[i].newreg = true;
+                args_ct[i].newreg = true;
                 ct_str++;
                 break;
 
             case 'p': /* plus */
                 /* Allocate to the register after the previous. */
-                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
+                tcg_debug_assert(i > (input_p ? nb_oargs : 0));
                 o = i - 1;
-                tcg_debug_assert(!def->args_ct[o].pair);
-                tcg_debug_assert(!def->args_ct[o].ct);
-                def->args_ct[i] = (TCGArgConstraint){
+                tcg_debug_assert(!args_ct[o].pair);
+                tcg_debug_assert(!args_ct[o].ct);
+                args_ct[i] = (TCGArgConstraint){
                     .pair = 2,
                     .pair_index = o,
-                    .regs = def->args_ct[o].regs << 1,
-                    .newreg = def->args_ct[o].newreg,
+                    .regs = args_ct[o].regs << 1,
+                    .newreg = args_ct[o].newreg,
                 };
-                def->args_ct[o].pair = 1;
-                def->args_ct[o].pair_index = i;
+                args_ct[o].pair = 1;
+                args_ct[o].pair_index = i;
                 tcg_debug_assert(ct_str[1] == '\0');
                 continue;
 
             case 'm': /* minus */
                 /* Allocate to the register before the previous. */
-                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
+                tcg_debug_assert(i > (input_p ? nb_oargs : 0));
                 o = i - 1;
-                tcg_debug_assert(!def->args_ct[o].pair);
-                tcg_debug_assert(!def->args_ct[o].ct);
-                def->args_ct[i] = (TCGArgConstraint){
+                tcg_debug_assert(!args_ct[o].pair);
+                tcg_debug_assert(!args_ct[o].ct);
+                args_ct[i] = (TCGArgConstraint){
                     .pair = 1,
                     .pair_index = o,
-                    .regs = def->args_ct[o].regs >> 1,
-                    .newreg = def->args_ct[o].newreg,
+                    .regs = args_ct[o].regs >> 1,
+                    .newreg = args_ct[o].newreg,
                 };
-                def->args_ct[o].pair = 2;
-                def->args_ct[o].pair_index = i;
+                args_ct[o].pair = 2;
+                args_ct[o].pair_index = i;
                 tcg_debug_assert(ct_str[1] == '\0');
                 continue;
             }
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
             do {
                 switch (*ct_str) {
                 case 'i':
-                    def->args_ct[i].ct |= TCG_CT_CONST;
+                    args_ct[i].ct |= TCG_CT_CONST;
                     break;
 
                 /* Include all of the target-specific constraints. */
 
 #undef CONST
 #define CONST(CASE, MASK) \
-    case CASE: def->args_ct[i].ct |= MASK; break;
+    case CASE: args_ct[i].ct |= MASK; break;
 #define REGS(CASE, MASK) \
-    case CASE: def->args_ct[i].regs |= MASK; break;
+    case CASE: args_ct[i].regs |= MASK; break;
 
 #include "tcg-target-con-str.h"
 
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
                 case '&':
                 case 'p':
                 case 'm':
-                    /* Typo in TCGTargetOpDef constraint. */
+                    /* Typo in TCGConstraintSet constraint. */
                     g_assert_not_reached();
                 }
             } while (*++ct_str != '\0');
         }
 
-        /* TCGTargetOpDef entry with too much information? */
-        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
-
         /*
          * Fix up output pairs that are aliased with inputs.
          * When we created the alias, we copied pair from the output.
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
          * first output to pair=3, and the pair_index'es to match.
          */
         if (saw_alias_pair) {
-            for (i = def->nb_oargs; i < nb_args; i++) {
+            for (int i = nb_oargs; i < nb_args; i++) {
+                int o, o2, i2;
+
                 /*
                  * Since [0-9pm] must be alone in the constraint string,
                  * the only way they can both be set is if the pair comes
                  * from the output alias.
                  */
-                if (!def->args_ct[i].ialias) {
+                if (!args_ct[i].ialias) {
                     continue;
                 }
-                switch (def->args_ct[i].pair) {
+                switch (args_ct[i].pair) {
                 case 0:
                     break;
                 case 1:
-                    o = def->args_ct[i].alias_index;
-                    o2 = def->args_ct[o].pair_index;
-                    tcg_debug_assert(def->args_ct[o].pair == 1);
-                    tcg_debug_assert(def->args_ct[o2].pair == 2);
-                    if (def->args_ct[o2].oalias) {
+                    o = args_ct[i].alias_index;
+                    o2 = args_ct[o].pair_index;
+                    tcg_debug_assert(args_ct[o].pair == 1);
+                    tcg_debug_assert(args_ct[o2].pair == 2);
+                    if (args_ct[o2].oalias) {
                         /* Case 1a */
-                        i2 = def->args_ct[o2].alias_index;
-                        tcg_debug_assert(def->args_ct[i2].pair == 2);
-                        def->args_ct[i2].pair_index = i;
-                        def->args_ct[i].pair_index = i2;
+                        i2 = args_ct[o2].alias_index;
+                        tcg_debug_assert(args_ct[i2].pair == 2);
+                        args_ct[i2].pair_index = i;
+                        args_ct[i].pair_index = i2;
                     } else {
                         /* Case 1b */
-                        def->args_ct[i].pair_index = i;
+                        args_ct[i].pair_index = i;
                     }
                     break;
                 case 2:
-                    o = def->args_ct[i].alias_index;
-                    o2 = def->args_ct[o].pair_index;
-                    tcg_debug_assert(def->args_ct[o].pair == 2);
-                    tcg_debug_assert(def->args_ct[o2].pair == 1);
-                    if (def->args_ct[o2].oalias) {
+                    o = args_ct[i].alias_index;
+                    o2 = args_ct[o].pair_index;
+                    tcg_debug_assert(args_ct[o].pair == 2);
+                    tcg_debug_assert(args_ct[o2].pair == 1);
+                    if (args_ct[o2].oalias) {
                         /* Case 1a */
-                        i2 = def->args_ct[o2].alias_index;
-                        tcg_debug_assert(def->args_ct[i2].pair == 1);
-                        def->args_ct[i2].pair_index = i;
-                        def->args_ct[i].pair_index = i2;
+                        i2 = args_ct[o2].alias_index;
+                        tcg_debug_assert(args_ct[i2].pair == 1);
+                        args_ct[i2].pair_index = i;
+                        args_ct[i].pair_index = i2;
                     } else {
                         /* Case 2 */
-                        def->args_ct[i].pair = 3;
-                        def->args_ct[o2].pair = 3;
-                        def->args_ct[i].pair_index = o2;
-                        def->args_ct[o2].pair_index = i;
+                        args_ct[i].pair = 3;
+                        args_ct[o2].pair = 3;
+                        args_ct[i].pair_index = o2;
+                        args_ct[o2].pair_index = i;
                     }
                     break;
                 default:
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
         }
 
         /* sort the constraints (XXX: this is just an heuristic) */
-        sort_constraints(def, 0, def->nb_oargs);
-        sort_constraints(def, def->nb_oargs, def->nb_iargs);
+        sort_constraints(args_ct, 0, nb_oargs);
+        sort_constraints(args_ct, nb_oargs, nb_iargs);
+    }
+
+    for (TCGOpcode op = 0; op < NB_OPS; op++) {
+        TCGOpDef *def = &tcg_op_defs[op];
+        const TCGConstraintSet *tdefs;
+        TCGConstraintSetIndex con_set;
+        int nb_args;
+
+        nb_args = def->nb_iargs + def->nb_oargs;
+        if (nb_args == 0) {
+            continue;
+        }
+
+        if (def->flags & TCG_OPF_NOT_PRESENT) {
+            def->args_ct = empty_cts;
+            continue;
+        }
+
+        /*
+         * Macro magic should make it impossible, but double-check that
+         * the array index is in range.  At the same time, double-check
+         * that the opcode is implemented, i.e. not C_NotImplemented.
+         */
+        con_set = tcg_target_op_def(op);
+        tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
+
+        /* The constraint arguments must match TCGOpcode arguments. */
+        tdefs = &constraint_sets[con_set];
+        tcg_debug_assert(tdefs->nb_oargs == def->nb_oargs);
+        tcg_debug_assert(tdefs->nb_iargs == def->nb_iargs);
+
+        def->args_ct = all_cts[con_set];
     }
 }
 
-- 
2.43.0

Introduce a new function, opcode_args_ct, to look up the argument
set for an opcode.  We lose the ability to assert the correctness
of the map from TCGOpcode to constraint sets at startup, but we can
still validate at runtime upon lookup.

Rename process_op_defs to process_constraint_sets, as it now does
nothing to TCGOpDef.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h |  1 -
 tcg/tcg-common.c  |  2 +-
 tcg/tcg.c         | 82 ++++++++++++++++++++++-------------------------
 3 files changed, 40 insertions(+), 45 deletions(-)

Now that we're no longer assigning to TCGOpDef.args_ct,
we can make the array constant.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h | 2 +-
 tcg/tcg-common.c  | 2 +-
 tcg/tcg.c         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
     uint8_t flags;
 } TCGOpDef;
 
-extern TCGOpDef tcg_op_defs[];
+extern const TCGOpDef tcg_op_defs[];
 extern const size_t tcg_op_defs_max;
 
 /*
diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-common.c
+++ b/tcg/tcg-common.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg/tcg.h"
 #include "tcg-has.h"
 
-TCGOpDef tcg_op_defs[] = {
+const TCGOpDef tcg_op_defs[] = {
 #define DEF(s, oargs, iargs, cargs, flags) \
          { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
 #include "tcg/tcg-opc.h"
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void process_constraint_sets(void)
 
 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
 {
-    TCGOpDef *def = &tcg_op_defs[op->opc];
+    const TCGOpDef *def = &tcg_op_defs[op->opc];
     TCGConstraintSetIndex con_set;
 
     if (def->nb_iargs + def->nb_oargs == 0) {
-- 
2.43.0

The br, mb, goto_tb and exit_tb opcodes do not have
register operands, only constants, flags, or labels.
Remove the special case in opcode_args_ct by including
TCG_OPF_NOT_PRESENT in the flags for these opcodes.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-opc.h | 8 ++++----
 tcg/tcg.c             | 3 ---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -XXX,XX +XXX,XX @@ DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
 /* variable number of parameters */
 DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
 
-DEF(br, 0, 0, 1, TCG_OPF_BB_END)
+DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
 
 #define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
 #if TCG_TARGET_REG_BITS == 32
@@ -XXX,XX +XXX,XX @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END)
 # define IMPL64  TCG_OPF_64BIT
 #endif
 
-DEF(mb, 0, 0, 1, 0)
+DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
 
 DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
 DEF(setcond_i32, 1, 2, 1, 0)
@@ -XXX,XX +XXX,XX @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
 /* There are tcg_ctx->insn_start_words here, not just one. */
 DEF(insn_start, 0, 0, DATA64_ARGS, TCG_OPF_NOT_PRESENT)
 
-DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
-DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
 DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
 
 DEF(plugin_cb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
     assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
 #endif
 
-    if (def->nb_iargs + def->nb_oargs == 0) {
-        return NULL;
-    }
     if (def->flags & TCG_OPF_NOT_PRESENT) {
         return empty_cts;
     }
-- 
2.43.0

Allow the backend to make constraint choices based on more parameters.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg.c                        | 4 ++--
 tcg/aarch64/tcg-target.c.inc     | 3 ++-
 tcg/arm/tcg-target.c.inc         | 3 ++-
 tcg/i386/tcg-target.c.inc        | 3 ++-
 tcg/loongarch64/tcg-target.c.inc | 3 ++-
 tcg/mips/tcg-target.c.inc        | 3 ++-
 tcg/ppc/tcg-target.c.inc         | 3 ++-
 tcg/riscv/tcg-target.c.inc       | 3 ++-
 tcg/s390x/tcg-target.c.inc       | 3 ++-
 tcg/sparc64/tcg-target.c.inc     | 3 ++-
 tcg/tci/tcg-target.c.inc         | 3 ++-
 11 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ typedef enum {
 #include "tcg-target-con-set.h"
 } TCGConstraintSetIndex;
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
 
 #undef C_O0_I1
 #undef C_O0_I2
@@ -XXX,XX +XXX,XX @@ static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
         return empty_cts;
     }
 
-    con_set = tcg_target_op_def(op->opc);
+    con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op));
     tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
 
     /* The constraint arguments must match TCGOpcode arguments. */
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
     }
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     }
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     }
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
     g_assert_not_reached();
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     }
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
     va_end(va);
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     }
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
     va_end(va);
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     }
 }
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@
 #endif
 #define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
 {
     switch (op) {
     case INDEX_op_goto_ptr:
-- 
2.43.0

Pass TCGOp.type to the output function.
For aarch64 and tci, use this instead of testing TCG_OPF_64BIT.
For s390x, use this instead of testing INDEX_op_deposit_i64.
For i386, use this to initialize rexw.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg.c                        |  4 ++--
 tcg/aarch64/tcg-target.c.inc     |  6 +-----
 tcg/arm/tcg-target.c.inc         |  2 +-
 tcg/i386/tcg-target.c.inc        | 10 +++++-----
 tcg/loongarch64/tcg-target.c.inc |  2 +-
 tcg/mips/tcg-target.c.inc        |  2 +-
 tcg/ppc/tcg-target.c.inc         |  2 +-
 tcg/riscv/tcg-target.c.inc       |  2 +-
 tcg/s390x/tcg-target.c.inc       |  7 +++----
 tcg/sparc64/tcg-target.c.inc     |  2 +-
 tcg/tci/tcg-target.c.inc         |  4 ++--
 11 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 static void tcg_out_goto_tb(TCGContext *s, int which);
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS]);
 #if TCG_TARGET_MAYBE_vec
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
             tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
                            TCGOP_VECE(op), new_args, const_args);
         } else {
-            tcg_out_op(s, op->opc, new_args, const_args);
+            tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
         }
         break;
     }
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
     flush_idcache_range(jmp_rx, jmp_rw, 4);
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
-    /* 99% of the time, we can signal the use of extension registers
-       by looking to see if the opcode handles 64-bit data.  */
-    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
-
     /* Hoist the loads of the most common arguments.  */
     TCGArg a0 = args[0];
     TCGArg a1 = args[1];
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
     flush_idcache_range(jmp_rx, jmp_rw, 4);
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
     /* no need to flush icache explicitly */
 }
 
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                              const TCGArg args[TCG_MAX_OP_ARGS],
-                              const int const_args[TCG_MAX_OP_ARGS])
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
 {
     TCGArg a0, a1, a2;
-    int c, const_a2, vexop, rexw = 0;
+    int c, const_a2, vexop, rexw;
 
 #if TCG_TARGET_REG_BITS == 64
 # define OP_32_64(x) \
         case glue(glue(INDEX_op_, x), _i64): \
-            rexw = P_REXW; /* FALLTHRU */    \
         case glue(glue(INDEX_op_, x), _i32)
 #else
 # define OP_32_64(x) \
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     a1 = args[1];
     a2 = args[2];
     const_a2 = const_args[2];
+    rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
 
     switch (opc) {
     case INDEX_op_goto_ptr:
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
     flush_idcache_range(jmp_rx, jmp_rw, 4);
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
     /* Always indirect, nothing to do */
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
     flush_idcache_range(jmp_rx, jmp_rw, 4);
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
     flush_idcache_range(jmp_rx, jmp_rw, 4);
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
         case glue(glue(INDEX_op_,x),_i32): \
         case glue(glue(INDEX_op_,x),_i64)
 
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                              const TCGArg args[TCG_MAX_OP_ARGS],
-                              const int const_args[TCG_MAX_OP_ARGS])
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
 {
     S390Opcode op, op2;
     TCGArg a0, a1, a2;
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             /* Since we can't support "0Z" as a constraint, we allow a1 in
                any register.  Fix things up as if a matching constraint.  */
             if (a0 != a1) {
-                TCGType type = (opc == INDEX_op_deposit_i64);
                 if (a0 == a2) {
                     tcg_out_mov(s, type, TCG_TMP0, a2);
                     a2 = TCG_TMP0;
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
 {
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
     /* Always indirect, nothing to do */
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
         {
             TCGArg pos = args[2], len = args[3];
-            TCGArg max = tcg_op_defs[opc].flags & TCG_OPF_64BIT ? 64 : 32;
+            TCGArg max = type == TCG_TYPE_I32 ? 32 : 64;
 
             tcg_debug_assert(pos < max);
             tcg_debug_assert(pos + len <= max);
-- 
2.43.0

This flag is no longer used.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-opc.h | 22 +++++++++++-----------
 include/tcg/tcg.h     |  2 --
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -XXX,XX +XXX,XX @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
 
 #define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
 #if TCG_TARGET_REG_BITS == 32
-# define IMPL64  TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT
+# define IMPL64  TCG_OPF_NOT_PRESENT
 #else
-# define IMPL64  TCG_OPF_64BIT
+# define IMPL64  0
 #endif
 
 DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
@@ -XXX,XX +XXX,XX @@ DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
 DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
 DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
 
-DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
+DEF(mov_i64, 1, 1, 0, TCG_OPF_NOT_PRESENT)
 DEF(setcond_i64, 1, 2, 1, IMPL64)
 DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64))
 DEF(movcond_i64, 1, 4, 1, IMPL64)
@@ -XXX,XX +XXX,XX @@ DEF(qemu_ld_a32_i32, 1, 1, 1,
 DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 /* Only used by i386 to cope with stupid register constraints. */
 DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
 
 /* Only for 64-bit hosts at the moment. */
 DEF(qemu_ld_a32_i128, 2, 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
     IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
 DEF(qemu_ld_a64_i128, 2, 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
     IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
 DEF(qemu_st_a32_i128, 0, 3, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
     IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
 DEF(qemu_st_a64_i128, 0, 3, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
     IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
 
 /* Host vector support.  */
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ enum {
     /* Instruction has side effects: it cannot be removed if its outputs
        are not used, and might trigger exceptions.  */
     TCG_OPF_SIDE_EFFECTS = 0x08,
-    /* Instruction operands are 64-bits (otherwise 32-bits).  */
-    TCG_OPF_64BIT        = 0x10,
     /* Instruction is optional and not implemented by the host, or insn
        is generic and should not be implemented by the host.  */
     TCG_OPF_NOT_PRESENT  = 0x20,
-- 
2.43.0

Now that we use a functional interface to query whether the opcode
is supported, we can drop the TCG_OPF_NOT_PRESENT bit mapping from
TCG_TARGET_HAS_foo in tcg-opc.h

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-opc.h | 306 +++++++++++++++++++-----------------------
 1 file changed, 141 insertions(+), 165 deletions(-)

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -XXX,XX +XXX,XX @@ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
 
 DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
 
-#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
-#if TCG_TARGET_REG_BITS == 32
-# define IMPL64  TCG_OPF_NOT_PRESENT
-#else
-# define IMPL64  0
-#endif
-
 DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
 
 DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
 DEF(setcond_i32, 1, 2, 1, 0)
-DEF(negsetcond_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_negsetcond_i32))
+DEF(negsetcond_i32, 1, 2, 1, 0)
 DEF(movcond_i32, 1, 4, 1, 0)
 /* load/store */
 DEF(ld8u_i32, 1, 1, 1, 0)
@@ -XXX,XX +XXX,XX @@ DEF(st_i32, 0, 2, 1, 0)
 DEF(add_i32, 1, 2, 0, 0)
 DEF(sub_i32, 1, 2, 0, 0)
 DEF(mul_i32, 1, 2, 0, 0)
-DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
-DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
-DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
-DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
-DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
-DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
+DEF(div_i32, 1, 2, 0, 0)
+DEF(divu_i32, 1, 2, 0, 0)
+DEF(rem_i32, 1, 2, 0, 0)
+DEF(remu_i32, 1, 2, 0, 0)
+DEF(div2_i32, 2, 3, 0, 0)
+DEF(divu2_i32, 2, 3, 0, 0)
 DEF(and_i32, 1, 2, 0, 0)
 DEF(or_i32, 1, 2, 0, 0)
 DEF(xor_i32, 1, 2, 0, 0)
@@ -XXX,XX +XXX,XX @@ DEF(xor_i32, 1, 2, 0, 0)
 DEF(shl_i32, 1, 2, 0, 0)
 DEF(shr_i32, 1, 2, 0, 0)
 DEF(sar_i32, 1, 2, 0, 0)
-DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
-DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
-DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
-DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
-DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
-DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32))
+DEF(rotl_i32, 1, 2, 0, 0)
+DEF(rotr_i32, 1, 2, 0, 0)
+DEF(deposit_i32, 1, 2, 2, 0)
+DEF(extract_i32, 1, 1, 2, 0)
+DEF(sextract_i32, 1, 1, 2, 0)
+DEF(extract2_i32, 1, 2, 1, 0)
 
 DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
 
-DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
-DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
-DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
-DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
-DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
-DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
-DEF(brcond2_i32, 0, 4, 2,
-    TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32))
-DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(add2_i32, 2, 4, 0, 0)
+DEF(sub2_i32, 2, 4, 0, 0)
+DEF(mulu2_i32, 2, 2, 0, 0)
+DEF(muls2_i32, 2, 2, 0, 0)
+DEF(muluh_i32, 1, 2, 0, 0)
+DEF(mulsh_i32, 1, 2, 0, 0)
+DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
+DEF(setcond2_i32, 1, 4, 1, 0)
 
-DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
-DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
-DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
-DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
-DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))
-DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))
-DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
+DEF(ext8s_i32, 1, 1, 0, 0)
+DEF(ext16s_i32, 1, 1, 0, 0)
+DEF(ext8u_i32, 1, 1, 0, 0)
+DEF(ext16u_i32, 1, 1, 0, 0)
+DEF(bswap16_i32, 1, 1, 1, 0)
+DEF(bswap32_i32, 1, 1, 1, 0)
+DEF(not_i32, 1, 1, 0, 0)
 DEF(neg_i32, 1, 1, 0, 0)
-DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
-DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
-DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
-DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
-DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
-DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
-DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
-DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
+DEF(andc_i32, 1, 2, 0, 0)
+DEF(orc_i32, 1, 2, 0, 0)
+DEF(eqv_i32, 1, 2, 0, 0)
+DEF(nand_i32, 1, 2, 0, 0)
+DEF(nor_i32, 1, 2, 0, 0)
+DEF(clz_i32, 1, 2, 0, 0)
+DEF(ctz_i32, 1, 2, 0, 0)
+DEF(ctpop_i32, 1, 1, 0, 0)
 
 DEF(mov_i64, 1, 1, 0, TCG_OPF_NOT_PRESENT)
-DEF(setcond_i64, 1, 2, 1, IMPL64)
-DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64))
-DEF(movcond_i64, 1, 4, 1, IMPL64)
+DEF(setcond_i64, 1, 2, 1, 0)
+DEF(negsetcond_i64, 1, 2, 1, 0)
+DEF(movcond_i64, 1, 4, 1, 0)
 /* load/store */
-DEF(ld8u_i64, 1, 1, 1, IMPL64)
-DEF(ld8s_i64, 1, 1, 1, IMPL64)
-DEF(ld16u_i64, 1, 1, 1, IMPL64)
-DEF(ld16s_i64, 1, 1, 1, IMPL64)
-DEF(ld32u_i64, 1, 1, 1, IMPL64)
-DEF(ld32s_i64, 1, 1, 1, IMPL64)
-DEF(ld_i64, 1, 1, 1, IMPL64)
-DEF(st8_i64, 0, 2, 1, IMPL64)
-DEF(st16_i64, 0, 2, 1, IMPL64)
-DEF(st32_i64, 0, 2, 1, IMPL64)
-DEF(st_i64, 0, 2, 1, IMPL64)
+DEF(ld8u_i64, 1, 1, 1, 0)
+DEF(ld8s_i64, 1, 1, 1, 0)
+DEF(ld16u_i64, 1, 1, 1, 0)
+DEF(ld16s_i64, 1, 1, 1, 0)
+DEF(ld32u_i64, 1, 1, 1, 0)
+DEF(ld32s_i64, 1, 1, 1, 0)
+DEF(ld_i64, 1, 1, 1, 0)
+DEF(st8_i64, 0, 2, 1, 0)
+DEF(st16_i64, 0, 2, 1, 0)
+DEF(st32_i64, 0, 2, 1, 0)
+DEF(st_i64, 0, 2, 1, 0)
 /* arith */
-DEF(add_i64, 1, 2, 0, IMPL64)
-DEF(sub_i64, 1, 2, 0, IMPL64)
-DEF(mul_i64, 1, 2, 0, IMPL64)
-DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
-DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
-DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
-DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
-DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
-DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
-DEF(and_i64, 1, 2, 0, IMPL64)
-DEF(or_i64, 1, 2, 0, IMPL64)
-DEF(xor_i64, 1, 2, 0, IMPL64)
+DEF(add_i64, 1, 2, 0, 0)
+DEF(sub_i64, 1, 2, 0, 0)
+DEF(mul_i64, 1, 2, 0, 0)
+DEF(div_i64, 1, 2, 0, 0)
+DEF(divu_i64, 1, 2, 0, 0)
+DEF(rem_i64, 1, 2, 0, 0)
+DEF(remu_i64, 1, 2, 0, 0)
+DEF(div2_i64, 2, 3, 0, 0)
+DEF(divu2_i64, 2, 3, 0, 0)
+DEF(and_i64, 1, 2, 0, 0)
+DEF(or_i64, 1, 2, 0, 0)
+DEF(xor_i64, 1, 2, 0, 0)
 /* shifts/rotates */
-DEF(shl_i64, 1, 2, 0, IMPL64)
-DEF(shr_i64, 1, 2, 0, IMPL64)
-DEF(sar_i64, 1, 2, 0, IMPL64)
-DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
-DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
-DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
-DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64))
-DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64))
-DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64))
+DEF(shl_i64, 1, 2, 0, 0)
+DEF(shr_i64, 1, 2, 0, 0)
+DEF(sar_i64, 1, 2, 0, 0)
+DEF(rotl_i64, 1, 2, 0, 0)
+DEF(rotr_i64, 1, 2, 0, 0)
+DEF(deposit_i64, 1, 2, 2, 0)
+DEF(extract_i64, 1, 1, 2, 0)
+DEF(sextract_i64, 1, 1, 2, 0)
+DEF(extract2_i64, 1, 2, 1, 0)
 
 /* size changing ops */
-DEF(ext_i32_i64, 1, 1, 0, IMPL64)
-DEF(extu_i32_i64, 1, 1, 0, IMPL64)
-DEF(extrl_i64_i32, 1, 1, 0,
-    IMPL(TCG_TARGET_HAS_extr_i64_i32)
-    | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
-DEF(extrh_i64_i32, 1, 1, 0,
-    IMPL(TCG_TARGET_HAS_extr_i64_i32)
-    | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
+DEF(ext_i32_i64, 1, 1, 0, 0)
+DEF(extu_i32_i64, 1, 1, 0, 0)
+DEF(extrl_i64_i32, 1, 1, 0, 0)
+DEF(extrh_i64_i32, 1, 1, 0, 0)
 
-DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64)
-DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
-DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
-DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
-DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
-DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
-DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
-DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
-DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
-DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
-DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
-DEF(neg_i64, 1, 1, 0, IMPL64)
-DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
-DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
-DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
-DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
-DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
-DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
-DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))
-DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64))
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
+DEF(ext8s_i64, 1, 1, 0, 0)
+DEF(ext16s_i64, 1, 1, 0, 0)
+DEF(ext32s_i64, 1, 1, 0, 0)
+DEF(ext8u_i64, 1, 1, 0, 0)
+DEF(ext16u_i64, 1, 1, 0, 0)
+DEF(ext32u_i64, 1, 1, 0, 0)
+DEF(bswap16_i64, 1, 1, 1, 0)
+DEF(bswap32_i64, 1, 1, 1, 0)
+DEF(bswap64_i64, 1, 1, 1, 0)
+DEF(not_i64, 1, 1, 0, 0)
+DEF(neg_i64, 1, 1, 0, 0)
+DEF(andc_i64, 1, 2, 0, 0)
+DEF(orc_i64, 1, 2, 0, 0)
+DEF(eqv_i64, 1, 2, 0, 0)
+DEF(nand_i64, 1, 2, 0, 0)
+DEF(nor_i64, 1, 2, 0, 0)
+DEF(clz_i64, 1, 2, 0, 0)
+DEF(ctz_i64, 1, 2, 0, 0)
+DEF(ctpop_i64, 1, 1, 0, 0)
 
-DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
-DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
-DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
-DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
-DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
-DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
+DEF(add2_i64, 2, 4, 0, 0)
+DEF(sub2_i64, 2, 4, 0, 0)
+DEF(mulu2_i64, 2, 2, 0, 0)
+DEF(muls2_i64, 2, 2, 0, 0)
+DEF(muluh_i64, 1, 2, 0, 0)
+DEF(mulsh_i64, 1, 2, 0, 0)
 
 #define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
 
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
 
 /* Only used by i386 to cope with stupid register constraints. */
 DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
-    IMPL(TCG_TARGET_HAS_qemu_st8_i32))
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
-    IMPL(TCG_TARGET_HAS_qemu_st8_i32))
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 /* Only for 64-bit hosts at the moment. */
-DEF(qemu_ld_a32_i128, 2, 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
-    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
-DEF(qemu_ld_a64_i128, 2, 1, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
-    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
-DEF(qemu_st_a32_i128, 0, 3, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
-    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
-DEF(qemu_st_a64_i128, 0, 3, 1,
-    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
-    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
+DEF(qemu_ld_a32_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld_a64_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st_a32_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st_a64_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 /* Host vector support.  */
 
-#define IMPLVEC  TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
+#define IMPLVEC  TCG_OPF_VECTOR
 
 DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
 
 DEF(dup_vec, 1, 1, 0, IMPLVEC)
-DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(dup2_vec, 1, 2, 0, IMPLVEC)
 
 DEF(ld_vec, 1, 1, 1, IMPLVEC)
 DEF(st_vec, 0, 2, 1, IMPLVEC)
@@ -XXX,XX +XXX,XX @@ DEF(dupm_vec, 1, 1, 1, IMPLVEC)
 
 DEF(add_vec, 1, 2, 0, IMPLVEC)
 DEF(sub_vec, 1, 2, 0, IMPLVEC)
-DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec))
-DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
-DEF(abs_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_abs_vec))
-DEF(ssadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
-DEF(usadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
-DEF(sssub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
-DEF(ussub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
-DEF(smin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
-DEF(umin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
-DEF(smax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
-DEF(umax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
+DEF(mul_vec, 1, 2, 0, IMPLVEC)
+DEF(neg_vec, 1, 1, 0, IMPLVEC)
+DEF(abs_vec, 1, 1, 0, IMPLVEC)
+DEF(ssadd_vec, 1, 2, 0, IMPLVEC)
+DEF(usadd_vec, 1, 2, 0, IMPLVEC)
+DEF(sssub_vec, 1, 2, 0, IMPLVEC)
+DEF(ussub_vec, 1, 2, 0, IMPLVEC)
+DEF(smin_vec, 1, 2, 0, IMPLVEC)
+DEF(umin_vec, 1, 2, 0, IMPLVEC)
+DEF(smax_vec, 1, 2, 0, IMPLVEC)
+DEF(umax_vec, 1, 2, 0, IMPLVEC)
 
 DEF(and_vec, 1, 2, 0, IMPLVEC)
 DEF(or_vec, 1, 2, 0, IMPLVEC)
 DEF(xor_vec, 1, 2, 0, IMPLVEC)
-DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
-DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
-DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
-DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
-DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
-DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
+DEF(andc_vec, 1, 2, 0, IMPLVEC)
+DEF(orc_vec, 1, 2, 0, IMPLVEC)
+DEF(nand_vec, 1, 2, 0, IMPLVEC)
+DEF(nor_vec, 1, 2, 0, IMPLVEC)
+DEF(eqv_vec, 1, 2, 0, IMPLVEC)
+DEF(not_vec, 1, 1, 0, IMPLVEC)
 
-DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
-DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
-DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
-DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
+DEF(shli_vec, 1, 1, 1, IMPLVEC)
+DEF(shri_vec, 1, 1, 1, IMPLVEC)
+DEF(sari_vec, 1, 1, 1, IMPLVEC)
+DEF(rotli_vec, 1, 1, 1, IMPLVEC)
 
-DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
-DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
-DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
-DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
+DEF(shls_vec, 1, 2, 0, IMPLVEC)
+DEF(shrs_vec, 1, 2, 0, IMPLVEC)
+DEF(sars_vec, 1, 2, 0, IMPLVEC)
+DEF(rotls_vec, 1, 2, 0, IMPLVEC)
 
-DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
-DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
-DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
-DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
-DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
+DEF(shlv_vec, 1, 2, 0, IMPLVEC)
+DEF(shrv_vec, 1, 2, 0, IMPLVEC)
+DEF(sarv_vec, 1, 2, 0, IMPLVEC)
+DEF(rotlv_vec, 1, 2, 0, IMPLVEC)
+DEF(rotrv_vec, 1, 2, 0, IMPLVEC)
 
 DEF(cmp_vec, 1, 2, 1, IMPLVEC)
 
-DEF(bitsel_vec, 1, 3, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_bitsel_vec))
-DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec))
+DEF(bitsel_vec, 1, 3, 0, IMPLVEC)
+DEF(cmpsel_vec, 1, 4, 1, IMPLVEC)
 
 DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
 
 #include "tcg-target-opc.h.inc"
 
 #undef DATA64_ARGS
-#undef IMPL
-#undef IMPL64
 #undef IMPLVEC
 #undef DEF
-- 
2.43.0

This is now a direct replacement.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-opc.h            | 89 +++++++++++++++-----------------
 tcg/aarch64/tcg-target-opc.h.inc |  4 +-
 tcg/arm/tcg-target-opc.h.inc     |  6 +--
 tcg/i386/tcg-target-opc.h.inc    | 22 ++++----
 tcg/ppc/tcg-target-opc.h.inc     | 12 ++---
 tcg/s390x/tcg-target-opc.h.inc   |  6 +--
 6 files changed, 68 insertions(+), 71 deletions(-)

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st_a64_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 /* Host vector support.  */
 
-#define IMPLVEC  TCG_OPF_VECTOR
-
 DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
 
-DEF(dup_vec, 1, 1, 0, IMPLVEC)
-DEF(dup2_vec, 1, 2, 0, IMPLVEC)
+DEF(dup_vec, 1, 1, 0, TCG_OPF_VECTOR)
+DEF(dup2_vec, 1, 2, 0, TCG_OPF_VECTOR)
 
-DEF(ld_vec, 1, 1, 1, IMPLVEC)
-DEF(st_vec, 0, 2, 1, IMPLVEC)
-DEF(dupm_vec, 1, 1, 1, IMPLVEC)
+DEF(ld_vec, 1, 1, 1, TCG_OPF_VECTOR)
+DEF(st_vec, 0, 2, 1, TCG_OPF_VECTOR)
+DEF(dupm_vec, 1, 1, 1, TCG_OPF_VECTOR)
 
-DEF(add_vec, 1, 2, 0, IMPLVEC)
-DEF(sub_vec, 1, 2, 0, IMPLVEC)
-DEF(mul_vec, 1, 2, 0, IMPLVEC)
-DEF(neg_vec, 1, 1, 0, IMPLVEC)
-DEF(abs_vec, 1, 1, 0, IMPLVEC)
-DEF(ssadd_vec, 1, 2, 0, IMPLVEC)
-DEF(usadd_vec, 1, 2, 0, IMPLVEC)
-DEF(sssub_vec, 1, 2, 0, IMPLVEC)
-DEF(ussub_vec, 1, 2, 0, IMPLVEC)
-DEF(smin_vec, 1, 2, 0, IMPLVEC)
-DEF(umin_vec, 1, 2, 0, IMPLVEC)
-DEF(smax_vec, 1, 2, 0, IMPLVEC)
-DEF(umax_vec, 1, 2, 0, IMPLVEC)
+DEF(add_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(sub_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(mul_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(neg_vec, 1, 1, 0, TCG_OPF_VECTOR)
+DEF(abs_vec, 1, 1, 0, TCG_OPF_VECTOR)
+DEF(ssadd_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(usadd_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(sssub_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(ussub_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(smin_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(umin_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(smax_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(umax_vec, 1, 2, 0, TCG_OPF_VECTOR)
 
-DEF(and_vec, 1, 2, 0, IMPLVEC)
-DEF(or_vec, 1, 2, 0, IMPLVEC)
-DEF(xor_vec, 1, 2, 0, IMPLVEC)
-DEF(andc_vec, 1, 2, 0, IMPLVEC)
-DEF(orc_vec, 1, 2, 0, IMPLVEC)
-DEF(nand_vec, 1, 2, 0, IMPLVEC)
-DEF(nor_vec, 1, 2, 0, IMPLVEC)
-DEF(eqv_vec, 1, 2, 0, IMPLVEC)
-DEF(not_vec, 1, 1, 0, IMPLVEC)
+DEF(and_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(or_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(xor_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(andc_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(orc_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(nand_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(nor_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(eqv_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(not_vec, 1, 1, 0, TCG_OPF_VECTOR)
 
-DEF(shli_vec, 1, 1, 1, IMPLVEC)
-DEF(shri_vec, 1, 1, 1, IMPLVEC)
-DEF(sari_vec, 1, 1, 1, IMPLVEC)
-DEF(rotli_vec, 1, 1, 1, IMPLVEC)
+DEF(shli_vec, 1, 1, 1, TCG_OPF_VECTOR)
+DEF(shri_vec, 1, 1, 1, TCG_OPF_VECTOR)
+DEF(sari_vec, 1, 1, 1, TCG_OPF_VECTOR)
+DEF(rotli_vec, 1, 1, 1, TCG_OPF_VECTOR)
 
-DEF(shls_vec, 1, 2, 0, IMPLVEC)
-DEF(shrs_vec, 1, 2, 0, IMPLVEC)
-DEF(sars_vec, 1, 2, 0, IMPLVEC)
-DEF(rotls_vec, 1, 2, 0, IMPLVEC)
+DEF(shls_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(shrs_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(sars_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(rotls_vec, 1, 2, 0, TCG_OPF_VECTOR)
 
-DEF(shlv_vec, 1, 2, 0, IMPLVEC)
-DEF(shrv_vec, 1, 2, 0, IMPLVEC)
-DEF(sarv_vec, 1, 2, 0, IMPLVEC)
-DEF(rotlv_vec, 1, 2, 0, IMPLVEC)
-DEF(rotrv_vec, 1, 2, 0, IMPLVEC)
+DEF(shlv_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(shrv_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(sarv_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(rotlv_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(rotrv_vec, 1, 2, 0, TCG_OPF_VECTOR)
 
-DEF(cmp_vec, 1, 2, 1, IMPLVEC)
+DEF(cmp_vec, 1, 2, 1, TCG_OPF_VECTOR)
 
-DEF(bitsel_vec, 1, 3, 0, IMPLVEC)
-DEF(cmpsel_vec, 1, 4, 1, IMPLVEC)
+DEF(bitsel_vec, 1, 3, 0, TCG_OPF_VECTOR)
+DEF(cmpsel_vec, 1, 4, 1, TCG_OPF_VECTOR)
 
 DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
 
 #include "tcg-target-opc.h.inc"
 
 #undef DATA64_ARGS
-#undef IMPLVEC
 #undef DEF
diff --git a/tcg/aarch64/tcg-target-opc.h.inc b/tcg/aarch64/tcg-target-opc.h.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target-opc.h.inc
+++ b/tcg/aarch64/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
  * consider these to be UNSPEC with names.
  */
 
-DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
-DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)
+DEF(aa64_sshl_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(aa64_sli_vec, 1, 2, 1, TCG_OPF_VECTOR)
diff --git a/tcg/arm/tcg-target-opc.h.inc b/tcg/arm/tcg-target-opc.h.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target-opc.h.inc
+++ b/tcg/arm/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
  * consider these to be UNSPEC with names.
  */
 
-DEF(arm_sli_vec, 1, 2, 1, IMPLVEC)
-DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
-DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
+DEF(arm_sli_vec, 1, 2, 1, TCG_OPF_VECTOR)
+DEF(arm_sshl_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(arm_ushl_vec, 1, 2, 0, TCG_OPF_VECTOR)
diff --git a/tcg/i386/tcg-target-opc.h.inc b/tcg/i386/tcg-target-opc.h.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target-opc.h.inc
+++ b/tcg/i386/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
  * consider these to be UNSPEC with names.
  */
 
-DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC)
-DEF(x86_blend_vec, 1, 2, 1, IMPLVEC)
-DEF(x86_packss_vec, 1, 2, 0, IMPLVEC)
-DEF(x86_packus_vec, 1, 2, 0, IMPLVEC)
-DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
-DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
-DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
-DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
-DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC)
-DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC)
-DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC)
+DEF(x86_shufps_vec, 1, 2, 1, TCG_OPF_VECTOR)
+DEF(x86_blend_vec, 1, 2, 1, TCG_OPF_VECTOR)
+DEF(x86_packss_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(x86_packus_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(x86_psrldq_vec, 1, 1, 1, TCG_OPF_VECTOR)
+DEF(x86_vperm2i128_vec, 1, 2, 1, TCG_OPF_VECTOR)
+DEF(x86_punpckl_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(x86_punpckh_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(x86_vpshldi_vec, 1, 2, 1, TCG_OPF_VECTOR)
+DEF(x86_vpshldv_vec, 1, 3, 0, TCG_OPF_VECTOR)
+DEF(x86_vpshrdv_vec, 1, 3, 0, TCG_OPF_VECTOR)
diff --git a/tcg/ppc/tcg-target-opc.h.inc b/tcg/ppc/tcg-target-opc.h.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target-opc.h.inc
+++ b/tcg/ppc/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
  * consider these to be UNSPEC with names.
  */
 
-DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC)
-DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC)
-DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
-DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
-DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
-DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
+DEF(ppc_mrgh_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(ppc_mrgl_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(ppc_msum_vec, 1, 3, 0, TCG_OPF_VECTOR)
+DEF(ppc_muleu_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(ppc_mulou_vec, 1, 2, 0, TCG_OPF_VECTOR)
+DEF(ppc_pkum_vec, 1, 2, 0, TCG_OPF_VECTOR)
diff --git a/tcg/s390x/tcg-target-opc.h.inc b/tcg/s390x/tcg-target-opc.h.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target-opc.h.inc
+++ b/tcg/s390x/tcg-target-opc.h.inc
@@ -XXX,XX +XXX,XX @@
  * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
  * consider these to be UNSPEC with names.
  */
-DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
-DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
-DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)
+DEF(s390_vuph_vec, 1, 1, 0, TCG_OPF_VECTOR)
+DEF(s390_vupl_vec, 1, 1, 0, TCG_OPF_VECTOR)
+DEF(s390_vpks_vec, 1, 2, 0, TCG_OPF_VECTOR)
-- 
2.43.0

We always provide bswap subroutines, whether they are optimized
using mips32r2 when available or not.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target-has.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target-has.h
+++ b/tcg/mips/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
+#define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   0
 
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #endif
 
 /* optional instructions detected at runtime */
-#define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_sextract_i32     0
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
 #if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_bswap16_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap32_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap64_i64      use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
 #define TCG_TARGET_HAS_extract_i64      use_mips32r2_instructions
 #define TCG_TARGET_HAS_sextract_i64     0
-- 
2.43.0

When we generalize {s}extract_i32, we'll lose the
specific register constraints on ext8u and ext8s.
It's just as easy to emit a couple of insns instead.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target.c.inc | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg)
 
 static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
 {
-    /* movzbl */
-    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+    if (TCG_TARGET_REG_BITS == 32 && src >= 4) {
+        tcg_out_mov(s, TCG_TYPE_I32, dest, src);
+        if (dest >= 4) {
+            tcg_out_modrm(s, OPC_ARITH_EvIz, ARITH_AND, dest);
+            tcg_out32(s, 0xff);
+            return;
+        }
+        src = dest;
+    }
     tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
 }
 
 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
 {
     int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
-    /* movsbl */
-    tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+
+    if (TCG_TARGET_REG_BITS == 32 && src >= 4) {
+        tcg_out_mov(s, TCG_TYPE_I32, dest, src);
+        if (dest >= 4) {
+            tcg_out_shifti(s, SHIFT_SHL, dest, 24);
+            tcg_out_shifti(s, SHIFT_SAR, dest, 24);
+            return;
+        }
+        src = dest;
+    }
     tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
 }
 
-- 
2.43.0

Accept byte and word extensions with the extract opcodes.
This is preparatory to removing the specialized extracts.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target-has.h | 49 +++++++++++++++++++++++++++----
 tcg/tcg-has.h             | 12 +++++---
 tcg/optimize.c            |  8 +++--
 tcg/tcg-op.c              | 12 +++-----
 tcg/i386/tcg-target.c.inc | 62 +++++++++++++++++++++++++++++----------
 5 files changed, 107 insertions(+), 36 deletions(-)

diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target-has.h
+++ b/tcg/i386/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
      (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
 #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
 
-/* Check for the possibility of high-byte extraction and, for 64-bit,
-   zero-extending 32-bit right-shift.  */
-#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
-#define TCG_TARGET_extract_i64_valid(ofs, len) \
-    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
+/*
+ * Check for the possibility of low byte/word extraction, high-byte extraction
+ * and zero-extending 32-bit right-shift.
+ *
+ * We cannot sign-extend from high byte to 64-bits without using the
+ * REX prefix that explicitly excludes access to the high-byte registers.
+ */
+static inline bool
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    switch (ofs) {
+    case 0:
+        switch (len) {
+        case 8:
+        case 16:
+            return true;
+        case 32:
+            return type == TCG_TYPE_I64;
+        }
+        return false;
+    case 8:
+        return len == 8 && type == TCG_TYPE_I32;
+    }
+    return false;
+}
+#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
+
+static inline bool
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (type == TCG_TYPE_I64 && ofs + len == 32) {
+        return true;
+    }
+    switch (ofs) {
+    case 0:
+        return len == 8 || len == 16;
+    case 8:
+        return len == 8;
+    }
+    return false;
+}
+#define TCG_TARGET_extract_valid  tcg_target_extract_valid
 
 #endif
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-has.h
+++ b/tcg/tcg-has.h
@@ -XXX,XX +XXX,XX @@
 #ifndef TCG_TARGET_deposit_i64_valid
 #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
 #endif
-#ifndef TCG_TARGET_extract_i32_valid
-#define TCG_TARGET_extract_i32_valid(ofs, len) 1
+#ifndef TCG_TARGET_extract_valid
+#define TCG_TARGET_extract_valid(type, ofs, len) \
+    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
+     : TCG_TARGET_HAS_extract_i64)
 #endif
-#ifndef TCG_TARGET_extract_i64_valid
-#define TCG_TARGET_extract_i64_valid(ofs, len) 1
+#ifndef TCG_TARGET_sextract_valid
+#define TCG_TARGET_sextract_valid(type, ofs, len) \
+    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
+     : TCG_TARGET_HAS_sextract_i64)
 #endif
 
 /* Only one of DIV or DIV2 should be defined.  */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
         xor_opc = INDEX_op_xor_i32;
         shr_opc = INDEX_op_shr_i32;
         neg_opc = INDEX_op_neg_i32;
-        if (TCG_TARGET_extract_i32_valid(sh, 1)) {
+        if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
             uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
+        }
+        if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
             sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
         }
         break;
@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
         xor_opc = INDEX_op_xor_i64;
         shr_opc = INDEX_op_shr_i64;
         neg_opc = INDEX_op_neg_i64;
-        if (TCG_TARGET_extract_i64_valid(sh, 1)) {
+        if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
             uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
+        }
+        if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
             sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
         }
         break;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
         return;
     }
 
-    if (TCG_TARGET_HAS_extract_i32
-        && TCG_TARGET_extract_i32_valid(ofs, len)) {
+    if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) {
         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
         return;
     }
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
         }
     }
 
-    if (TCG_TARGET_HAS_sextract_i32
-        && TCG_TARGET_extract_i32_valid(ofs, len)) {
+    if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) {
         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
         return;
     }
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
         goto do_shift_and;
     }
 
-    if (TCG_TARGET_HAS_extract_i64
-        && TCG_TARGET_extract_i64_valid(ofs, len)) {
+    if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) {
         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
         return;
     }
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
         return;
     }
 
-    if (TCG_TARGET_HAS_sextract_i64
-        && TCG_TARGET_extract_i64_valid(ofs, len)) {
+    if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) {
         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
         return;
     }
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
 
     case INDEX_op_extract_i64:
         if (a2 + args[3] == 32) {
+            if (a2 == 0) {
+                tcg_out_ext32u(s, a0, a1);
+                break;
+            }
             /* This is a 32-bit zero-extending right shift.  */
             tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
             tcg_out_shifti(s, SHIFT_SHR, a0, a2);
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         }
         /* FALLTHRU */
     case INDEX_op_extract_i32:
-        /* On the off-chance that we can use the high-byte registers.
-           Otherwise we emit the same ext16 + shift pattern that we
-           would have gotten from the normal tcg-op.c expansion.  */
-        tcg_debug_assert(a2 == 8 && args[3] == 8);
-        if (a1 < 4 && a0 < 8) {
-            tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
-        } else {
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8u(s, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
             tcg_out_ext16u(s, a0, a1);
-            tcg_out_shifti(s, SHIFT_SHR, a0, 8);
+        } else if (a2 == 8 && args[3] == 8) {
+            /*
+             * On the off-chance that we can use the high-byte registers.
+             * Otherwise we emit the same ext16 + shift pattern that we
+             * would have gotten from the normal tcg-op.c expansion.
+             */
+            if (a1 < 4 && a0 < 8) {
+                tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
+            } else {
+                tcg_out_ext16u(s, a0, a1);
+                tcg_out_shifti(s, SHIFT_SHR, a0, 8);
+            }
+        } else {
+            g_assert_not_reached();
+        }
+        break;
+
+    case INDEX_op_sextract_i64:
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
+            tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1);
+        } else if (a2 == 0 && args[3] == 32) {
+            tcg_out_ext32s(s, a0, a1);
+        } else {
+            g_assert_not_reached();
         }
         break;
 
     case INDEX_op_sextract_i32:
-        /* We don't implement sextract_i64, as we cannot sign-extend to
-           64-bits without using the REX prefix that explicitly excludes
-           access to the high-byte registers.  */
-        tcg_debug_assert(a2 == 8 && args[3] == 8);
-        if (a1 < 4 && a0 < 8) {
-            tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
-        } else {
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
             tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
-            tcg_out_shifti(s, SHIFT_SAR, a0, 8);
+        } else if (a2 == 8 && args[3] == 8) {
+            if (a1 < 4 && a0 < 8) {
+                tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
+            } else {
+                tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
+                tcg_out_shifti(s, SHIFT_SAR, a0, 8);
+            }
+        } else {
+            g_assert_not_reached();
         }
         break;
 
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_extract_i32:
     case INDEX_op_extract_i64:
     case INDEX_op_sextract_i32:
+    case INDEX_op_sextract_i64:
     case INDEX_op_ctpop_i32:
     case INDEX_op_ctpop_i64:
         return C_O1_I1(r, r);
-- 
2.43.0

We're about to change canonicalization of masks as extract
instead of and.  Retain the andi expansion here.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.c.inc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
 
     case INDEX_op_extract_i64:
     case INDEX_op_extract_i32:
-        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
+        if (a2 == 0) {
+            uint64_t mask = MAKE_64BIT_MASK(0, args[3]);
+            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, mask);
+        } else {
+            tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
+        }
         break;
 
     case INDEX_op_sextract_i64:
-- 
2.43.0

The armv6 uxt and sxt opcodes have a 2-bit rotate field
which supports extractions from ofs = {0,8,16,24}.
Special case ofs = 0, len <= 8 as AND.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/arm/tcg-target-has.h | 21 ++++++++++++++--
 tcg/arm/tcg-target.c.inc | 54 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target-has.h
+++ b/tcg/arm/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
 #define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
-#define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
-#define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_mulu2_i32        1
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_cmpsel_vec       0
 #define TCG_TARGET_HAS_tst_vec          1
 
+static inline bool
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (use_armv7_instructions) {
+        return true;  /* SBFX or UBFX */
+    }
+    switch (len) {
+    case 8:   /* SXTB or UXTB */
+    case 16:  /* SXTH or UXTH */
+        return (ofs % 8) == 0;
+    }
+    return false;
+}
+
+#define TCG_TARGET_extract_valid   tcg_target_extract_valid
+#define TCG_TARGET_sextract_valid  tcg_target_extract_valid
+
 #endif
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
 static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
                             TCGReg rn, int ofs, int len)
 {
-    /* ubfx */
-    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
-              | (ofs << 7) | ((len - 1) << 16));
+    /* According to gcc, AND can be faster. */
+    if (ofs == 0 && len <= 8) {
+        tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn,
+                        encode_imm_nofail((1 << len) - 1));
+        return;
+    }
+
+    if (use_armv7_instructions) {
+        /* ubfx */
+        tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
+                  | (ofs << 7) | ((len - 1) << 16));
+        return;
+    }
+
+    assert(ofs % 8 == 0);
+    switch (len) {
+    case 8:
+        /* uxtb */
+        tcg_out32(s, 0x06ef0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+        break;
+    case 16:
+        /* uxth */
+        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
 static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
                              TCGReg rn, int ofs, int len)
 {
-    /* sbfx */
-    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
-              | (ofs << 7) | ((len - 1) << 16));
+    if (use_armv7_instructions) {
+        /* sbfx */
+        tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
+                  | (ofs << 7) | ((len - 1) << 16));
+        return;
+    }
+
+    assert(ofs % 8 == 0);
+    switch (len) {
+    case 8:
+        /* sxtb */
+        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+        break;
+    case 16:
+        /* sxth */
+        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
+
 static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
                           TCGReg rd, TCGReg rn, int32_t offset)
 {
-- 
2.43.0

Accept byte and word extensions with the extract opcodes.
This is preparatory to removing the specialized extracts.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target-has.h | 15 ++++++++++++--
 tcg/loongarch64/tcg-target.c.inc | 34 ++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target-has.h
+++ b/tcg/loongarch64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_extr_i64_i32     1
 #define TCG_TARGET_HAS_ext8s_i64        1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_cmpsel_vec       0
 #define TCG_TARGET_HAS_tst_vec          0
 
+#define TCG_TARGET_extract_valid(type, ofs, len)   1
+
+static inline bool
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (type == TCG_TYPE_I64 && ofs + len == 32) {
+        return true;
+    }
+    return ofs == 0 && (len == 8 || len == 16);
+}
+#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 
 #endif
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         break;
 
     case INDEX_op_extract_i32:
-        tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
+        if (a2 == 0 && args[3] <= 12) {
+            tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1);
+        } else {
+            tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
+        }
         break;
     case INDEX_op_extract_i64:
-        tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
+        if (a2 == 0 && args[3] <= 12) {
+            tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1);
+        } else {
+            tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
+        }
+        break;
+
+    case INDEX_op_sextract_i64:
+        if (a2 + args[3] == 32) {
+            if (a2 == 0) {
+                tcg_out_ext32s(s, a0, a1);
+            } else {
+                tcg_out_opc_srai_w(s, a0, a1, a2);
+            }
+            break;
+        }
+        /* FALLTHRU */
+    case INDEX_op_sextract_i32:
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
+            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
+        } else {
+            g_assert_not_reached();
+        }
         break;
 
     case INDEX_op_deposit_i32:
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_not_i64:
     case INDEX_op_extract_i32:
     case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i32:
+    case INDEX_op_sextract_i64:
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap16_i64:
     case INDEX_op_bswap32_i32:
-- 
2.43.0

Accept AND, ext32u, ext32s extensions with the extract opcodes.
This is preparatory to removing the specialized extracts.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target-has.h | 26 ++++++++++++++++++++++----
 tcg/mips/tcg-target.c.inc | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target-has.h
+++ b/tcg/mips/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 
 /* optional instructions detected at runtime */
 #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
-#define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
-#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_extract_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_qemu_ldst_i128   0
 #define TCG_TARGET_HAS_tst              0
 
+#define TCG_TARGET_extract_valid(type, ofs, len)  use_mips32r2_instructions
+
+static inline bool
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (ofs == 0) {
+        switch (len) {
+        case 8:
+        case 16:
+            return use_mips32r2_instructions;
+        case 32:
+            return type == TCG_TYPE_I64;
+        }
+    }
+    return false;
+}
+#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
+
 #endif
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2,
                          args[3] + args[4] - 1, args[3]);
         break;
+
     case INDEX_op_extract_i32:
-        tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2);
+        if (a2 == 0 && args[3] <= 16) {
+            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1);
+        } else {
+            tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2);
+        }
         break;
     case INDEX_op_extract_i64:
-        tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1,
-                         args[3] - 1, a2);
+        if (a2 == 0 && args[3] <= 16) {
+            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1);
+        } else {
+            tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU,
+                             a0, a1, args[3] - 1, a2);
+        }
+        break;
+
+    case INDEX_op_sextract_i64:
+        if (a2 == 0 && args[3] == 32) {
+            tcg_out_ext32s(s, a0, a1);
+            break;
+        }
+        /* FALLTHRU */
+    case INDEX_op_sextract_i32:
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
+            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
+        } else {
+            g_assert_not_reached();
+        }
         break;
 
     case INDEX_op_brcond_i32:
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_ext8s_i32:
     case INDEX_op_ext16s_i32:
     case INDEX_op_extract_i32:
+    case INDEX_op_sextract_i32:
     case INDEX_op_ld8u_i64:
     case INDEX_op_ld8s_i64:
     case INDEX_op_ld16u_i64:
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_extrl_i64_i32:
     case INDEX_op_extrh_i64_i32:
     case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i64:
         return C_O1_I1(r, r);
 
     case INDEX_op_st8_i32:
-- 
2.43.0

Accept byte and word extensions with the extract opcodes.
This is preparatory to removing the specialized extracts.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target-has.h | 16 ++++++++++++++--
 tcg/ppc/tcg-target.c.inc | 30 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target-has.h
+++ b/tcg/ppc/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_mulu2_i32        0
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_cmpsel_vec       1
 #define TCG_TARGET_HAS_tst_vec          0
 
+#define TCG_TARGET_extract_valid(type, ofs, len)   1
+
+static inline bool
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (type == TCG_TYPE_I64 && ofs + len == 32) {
+        return true;
+    }
+    return ofs == 0 && (len == 8 || len == 16);
+}
+#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
+
 #endif
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         break;
 
     case INDEX_op_extract_i32:
+        if (args[2] == 0 && args[3] <= 16) {
+            tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
+            break;
+        }
         tcg_out_rlw(s, RLWINM, args[0], args[1],
                     32 - args[2], 32 - args[3], 31);
         break;
     case INDEX_op_extract_i64:
+        if (args[2] == 0 && args[3] <= 16) {
+            tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
+            break;
+        }
         tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
         break;
 
+    case INDEX_op_sextract_i64:
+        if (args[2] + args[3] == 32) {
+            if (args[2] == 0) {
+                tcg_out_ext32s(s, args[0], args[1]);
+            } else {
+                tcg_out_sari32(s, args[0], args[1], args[2]);
+            }
+            break;
+        }
+        /* FALLTHRU */
+    case INDEX_op_sextract_i32:
+        if (args[2] == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
+        } else if (args[2] == 0 && args[3] == 16) {
+            tcg_out_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
+        } else {
+            g_assert_not_reached();
+        }
+        break;
+
     case INDEX_op_movcond_i32:
         tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
                         args[3], args[4], const_args[2]);
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap32_i32:
     case INDEX_op_extract_i32:
+    case INDEX_op_sextract_i32:
     case INDEX_op_ld8u_i64:
     case INDEX_op_ld8s_i64:
     case INDEX_op_ld16u_i64:
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_bswap32_i64:
     case INDEX_op_bswap64_i64:
     case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i64:
         return C_O1_I1(r, r);
 
     case INDEX_op_st8_i32:
-- 
2.43.0

Accept byte and word extensions with the extract opcodes.
This is preparatory to removing the specialized extracts.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/riscv/tcg-target-has.h | 39 ++++++++++++++++++++++++++++++++++----
 tcg/riscv/tcg-target.c.inc | 34 +++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target-has.h
+++ b/tcg/riscv/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_div2_i32         0
 #define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
 #define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_extract_i32      0
-#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_div2_i64         0
 #define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
 #define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      0
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_extr_i64_i32     1
 #define TCG_TARGET_HAS_ext8s_i64        1
@@ -XXX,XX +XXX,XX @@
 
 #define TCG_TARGET_HAS_tst_vec          0
 
+static inline bool
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (ofs == 0) {
+        switch (len) {
+        case 16:
+            return cpuinfo & CPUINFO_ZBB;
+        case 32:
+            return (cpuinfo & CPUINFO_ZBA) && type == TCG_TYPE_I64;
+        }
+    }
+    return false;
+}
+#define TCG_TARGET_extract_valid  tcg_target_extract_valid
+
+static inline bool
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (ofs == 0) {
+        switch (len) {
+        case 8:
+        case 16:
+            return cpuinfo & CPUINFO_ZBB;
+        case 32:
+            return type == TCG_TYPE_I64;
+        }
+    }
+    return false;
+}
+#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
+
 #endif
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_mb(s, a0);
         break;
 
+    case INDEX_op_extract_i64:
+        if (a2 == 0 && args[3] == 32) {
+            tcg_out_ext32u(s, a0, a1);
+            break;
+        }
+        /* FALLTHRU */
+    case INDEX_op_extract_i32:
+        if (a2 == 0 && args[3] == 16) {
+            tcg_out_ext16u(s, a0, a1);
+        } else {
+            g_assert_not_reached();
+        }
+        break;
+
+    case INDEX_op_sextract_i64:
+        if (a2 == 0 && args[3] == 32) {
+            tcg_out_ext32s(s, a0, a1);
+            break;
+        }
+        /* FALLTHRU */
+    case INDEX_op_sextract_i32:
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
+            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
+        } else {
+            g_assert_not_reached();
+        }
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_extrl_i64_i32:
     case INDEX_op_extrh_i64_i32:
     case INDEX_op_ext_i32_i64:
+    case INDEX_op_extract_i32:
+    case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i32:
+    case INDEX_op_sextract_i64:
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap32_i32:
     case INDEX_op_bswap16_i64:
-- 
2.43.0

Extracts which abut bit 32 may use 32-bit shifts.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/riscv/tcg-target-has.h | 24 +++++++-----------------
 tcg/riscv/tcg-target.c.inc | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target-has.h
+++ b/tcg/riscv/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 static inline bool
 tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
 {
-    if (ofs == 0) {
-        switch (len) {
-        case 16:
-            return cpuinfo & CPUINFO_ZBB;
-        case 32:
-            return (cpuinfo & CPUINFO_ZBA) && type == TCG_TYPE_I64;
-        }
+    if (type == TCG_TYPE_I64 && ofs + len == 32) {
+        /* ofs > 0 uses SRLIW; ofs == 0 uses add.uw. */
+        return ofs || (cpuinfo & CPUINFO_ZBA);
     }
-    return false;
+    return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && len == 16;
 }
 #define TCG_TARGET_extract_valid  tcg_target_extract_valid
 
 static inline bool
 tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 {
-    if (ofs == 0) {
-        switch (len) {
-        case 8:
-        case 16:
-            return cpuinfo & CPUINFO_ZBB;
-        case 32:
-            return type == TCG_TYPE_I64;
-        }
+    if (type == TCG_TYPE_I64 && ofs + len == 32) {
+        return true;
     }
-    return false;
+    return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && (len == 8 || len == 16);
 }
 #define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         break;
 
     case INDEX_op_extract_i64:
-        if (a2 == 0 && args[3] == 32) {
-            tcg_out_ext32u(s, a0, a1);
+        if (a2 + args[3] == 32) {
+            if (a2 == 0) {
+                tcg_out_ext32u(s, a0, a1);
+            } else {
+                tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2);
+            }
             break;
         }
         /* FALLTHRU */
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         break;
 
     case INDEX_op_sextract_i64:
-        if (a2 == 0 && args[3] == 32) {
-            tcg_out_ext32s(s, a0, a1);
+        if (a2 + args[3] == 32) {
+            if (a2 == 0) {
+                tcg_out_ext32s(s, a0, a1);
+            } else {
+                tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2);
+            }
             break;
         }
         /* FALLTHRU */
-- 
2.43.0

Accept byte and word extensions with the extract opcodes.
This is preparatory to removing the specialized extracts.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/s390x/tcg-target-has.h | 22 ++++++++++++++++++++--
 tcg/s390x/tcg-target.c.inc | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target-has.h
+++ b/tcg/s390x/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_ctpop_i32      1
 #define TCG_TARGET_HAS_deposit_i32    1
 #define TCG_TARGET_HAS_extract_i32    1
-#define TCG_TARGET_HAS_sextract_i32   0
+#define TCG_TARGET_HAS_sextract_i32   1
 #define TCG_TARGET_HAS_extract2_i32   0
 #define TCG_TARGET_HAS_negsetcond_i32 1
 #define TCG_TARGET_HAS_add2_i32       1
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_ctpop_i64      1
 #define TCG_TARGET_HAS_deposit_i64    1
 #define TCG_TARGET_HAS_extract_i64    1
-#define TCG_TARGET_HAS_sextract_i64   0
+#define TCG_TARGET_HAS_sextract_i64   1
 #define TCG_TARGET_HAS_extract2_i64   0
 #define TCG_TARGET_HAS_negsetcond_i64 1
 #define TCG_TARGET_HAS_add2_i64       1
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_cmpsel_vec     1
 #define TCG_TARGET_HAS_tst_vec        0
 
+#define TCG_TARGET_extract_valid(type, ofs, len)   1
+
+static inline bool
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (ofs == 0) {
+        switch (len) {
+        case 8:
+        case 16:
+            return true;
+        case 32:
+            return type == TCG_TYPE_I64;
+        }
+    }
+    return false;
+}
+#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
+
 #endif
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
 static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
                          int ofs, int len)
 {
+    if (ofs == 0) {
+        switch (len) {
+        case 8:
+            tcg_out_ext8u(s, dest, src);
+            return;
+        case 16:
+            tcg_out_ext16u(s, dest, src);
+            return;
+        case 32:
+            tcg_out_ext32u(s, dest, src);
+            return;
+        }
+    }
     tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
 }
 
+static void tgen_sextract(TCGContext *s, TCGReg dest, TCGReg src,
+                          int ofs, int len)
+{
+    if (ofs == 0) {
+        switch (len) {
+        case 8:
+            tcg_out_ext8s(s, TCG_TYPE_REG, dest, src);
+            return;
+        case 16:
+            tcg_out_ext16s(s, TCG_TYPE_REG, dest, src);
+            return;
+        case 32:
+            tcg_out_ext32s(s, dest, src);
+            return;
+        }
+    }
+    g_assert_not_reached();
+}
+
 static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
 {
     ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
     OP_32_64(extract):
         tgen_extract(s, args[0], args[1], args[2], args[3]);
         break;
+    OP_32_64(sextract):
+        tgen_sextract(s, args[0], args[1], args[2], args[3]);
+        break;
 
     case INDEX_op_clz_i64:
         tgen_clz(s, args[0], args[1], args[2], const_args[2]);
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_extu_i32_i64:
     case INDEX_op_extract_i32:
     case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i32:
+    case INDEX_op_sextract_i64:
     case INDEX_op_ctpop_i32:
     case INDEX_op_ctpop_i64:
         return C_O1_I1(r, r);
-- 
2.43.0

Extracts which abut bit 32 may use 32-bit shifts.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/sparc64/tcg-target-has.h | 13 +++++++++----
 tcg/sparc64/tcg-target.c.inc | 11 +++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target-has.h
+++ b/tcg/sparc64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_ctz_i32          0
 #define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_extract_i32      0
-#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_ctz_i64          0
 #define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      0
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 
 #define TCG_TARGET_HAS_tst              1
 
+#define TCG_TARGET_extract_valid(type, ofs, len) \
+    ((type) == TCG_TYPE_I64 && (ofs) + (len) == 32)
+
+#define TCG_TARGET_sextract_valid  TCG_TARGET_extract_valid
+
 #endif
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_mb(s, a0);
         break;
 
+    case INDEX_op_extract_i64:
+        tcg_debug_assert(a2 + args[3] == 32);
+        tcg_out_arithi(s, a0, a1, a2, SHIFT_SRL);
+        break;
+    case INDEX_op_sextract_i64:
+        tcg_debug_assert(a2 + args[3] == 32);
+        tcg_out_arithi(s, a0, a1, a2, SHIFT_SRA);
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_ext32u_i64:
     case INDEX_op_ext_i32_i64:
     case INDEX_op_extu_i32_i64:
+    case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i64:
     case INDEX_op_qemu_ld_a32_i32:
     case INDEX_op_qemu_ld_a64_i32:
     case INDEX_op_qemu_ld_a32_i64:
-- 
2.43.0

We already have these assertions during opcode creation.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tci/tcg-target.c.inc | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         break;
 
     CASE_32_64(deposit)  /* Optional (TCG_TARGET_HAS_deposit_*). */
-        {
-            TCGArg pos = args[3], len = args[4];
-            TCGArg max = opc == INDEX_op_deposit_i32 ? 32 : 64;
-
-            tcg_debug_assert(pos < max);
-            tcg_debug_assert(pos + len <= max);
-
-            tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], pos, len);
-        }
+        tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]);
         break;
 
     CASE_32_64(extract)  /* Optional (TCG_TARGET_HAS_extract_*). */
     CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
-        {
-            TCGArg pos = args[2], len = args[3];
-            TCGArg max = type == TCG_TYPE_I32 ? 32 : 64;
-
-            tcg_debug_assert(pos < max);
-            tcg_debug_assert(pos + len <= max);
-
-            tcg_out_op_rrbb(s, opc, args[0], args[1], pos, len);
-        }
+        tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]);
         break;
 
     CASE_32_64(brcond)
-- 
2.43.0

Make extract and sextract "unconditional" in the sense
that the opcodes are always present.  Rely instead on
TCG_TARGET_HAS_{s}extract_valid, now always defined.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target-has.h     |  4 ----
 tcg/arm/tcg-target-has.h         |  2 --
 tcg/i386/tcg-target-has.h        |  4 ----
 tcg/loongarch64/tcg-target-has.h |  4 ----
 tcg/mips/tcg-target-has.h        |  4 ----
 tcg/ppc/tcg-target-has.h         |  4 ----
 tcg/riscv/tcg-target-has.h       |  4 ----
 tcg/s390x/tcg-target-has.h       |  4 ----
 tcg/sparc64/tcg-target-has.h     |  4 ----
 tcg/tcg-has.h                    | 12 ------------
 tcg/tci/tcg-target-has.h         |  4 ----
 tcg/optimize.c                   |  8 ++++----
 tcg/tcg.c                        | 12 ++++--------
 tcg/tci.c                        |  8 --------
 14 files changed, 8 insertions(+), 70 deletions(-)

diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target-has.h
+++ b/tcg/aarch64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctz_i64          1
 #define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target-has.h
+++ b/tcg/arm/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
 #define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_mulu2_i32        1
diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target-has.h
+++ b/tcg/i386/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_ctpop_i32        have_popcnt
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctz_i64          1
 #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target-has.h
+++ b/tcg/loongarch64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_div2_i32         0
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_div2_i64         0
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_extr_i64_i32     1
 #define TCG_TARGET_HAS_ext8s_i64        1
diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target-has.h
+++ b/tcg/mips/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 
 /* optional instructions detected at runtime */
 #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target-has.h
+++ b/tcg/ppc/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
 #define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_mulu2_i32        0
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
 #define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target-has.h
+++ b/tcg/riscv/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_div2_i32         0
 #define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
 #define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_div2_i64         0
 #define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
 #define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_extr_i64_i32     1
 #define TCG_TARGET_HAS_ext8s_i64        1
diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target-has.h
+++ b/tcg/s390x/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_ctz_i32        0
 #define TCG_TARGET_HAS_ctpop_i32      1
 #define TCG_TARGET_HAS_deposit_i32    1
-#define TCG_TARGET_HAS_extract_i32    1
-#define TCG_TARGET_HAS_sextract_i32   1
 #define TCG_TARGET_HAS_extract2_i32   0
 #define TCG_TARGET_HAS_negsetcond_i32 1
 #define TCG_TARGET_HAS_add2_i32       1
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_ctz_i64        0
 #define TCG_TARGET_HAS_ctpop_i64      1
 #define TCG_TARGET_HAS_deposit_i64    1
-#define TCG_TARGET_HAS_extract_i64    1
-#define TCG_TARGET_HAS_sextract_i64   1
 #define TCG_TARGET_HAS_extract2_i64   0
 #define TCG_TARGET_HAS_negsetcond_i64 1
 #define TCG_TARGET_HAS_add2_i64       1
diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target-has.h
+++ b/tcg/sparc64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_ctz_i32          0
 #define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_ctz_i64          0
 #define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-has.h
+++ b/tcg/tcg-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ctz_i64          0
 #define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      0
-#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_negsetcond_i64   0
 #define TCG_TARGET_HAS_add2_i64         0
@@ -XXX,XX +XXX,XX @@
 #ifndef TCG_TARGET_deposit_i64_valid
 #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
 #endif
-#ifndef TCG_TARGET_extract_valid
-#define TCG_TARGET_extract_valid(type, ofs, len) \
-    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
-     : TCG_TARGET_HAS_extract_i64)
-#endif
-#ifndef TCG_TARGET_sextract_valid
-#define TCG_TARGET_sextract_valid(type, ofs, len) \
-    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
-     : TCG_TARGET_HAS_sextract_i64)
-#endif
 
 /* Only one of DIV or DIV2 should be defined.  */
 #if defined(TCG_TARGET_HAS_div_i32)
diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target-has.h
+++ b/tcg/tci/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ext16u_i32       1
 #define TCG_TARGET_HAS_andc_i32         1
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      1
-#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_eqv_i32          1
 #define TCG_TARGET_HAS_nand_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          1
diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
         shr_opc = INDEX_op_shr_i32;
         neg_opc = INDEX_op_neg_i32;
         if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
-            uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
+            uext_opc = INDEX_op_extract_i32;
         }
         if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
-            sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
+            sext_opc = INDEX_op_sextract_i32;
         }
         break;
     case TCG_TYPE_I64:
@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
         shr_opc = INDEX_op_shr_i64;
         neg_opc = INDEX_op_neg_i64;
         if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
-            uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
+            uext_opc = INDEX_op_extract_i64;
         }
         if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
-            sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
+            sext_opc = INDEX_op_sextract_i64;
         }
         break;
     default:
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_shl_i32:
     case INDEX_op_shr_i32:
     case INDEX_op_sar_i32:
+    case INDEX_op_extract_i32:
+    case INDEX_op_sextract_i32:
         return true;
 
     case INDEX_op_negsetcond_i32:
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
         return TCG_TARGET_HAS_rot_i32;
     case INDEX_op_deposit_i32:
         return TCG_TARGET_HAS_deposit_i32;
-    case INDEX_op_extract_i32:
-        return TCG_TARGET_HAS_extract_i32;
-    case INDEX_op_sextract_i32:
-        return TCG_TARGET_HAS_sextract_i32;
     case INDEX_op_extract2_i32:
         return TCG_TARGET_HAS_extract2_i32;
     case INDEX_op_add2_i32:
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_sar_i64:
     case INDEX_op_ext_i32_i64:
     case INDEX_op_extu_i32_i64:
+    case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i64:
         return TCG_TARGET_REG_BITS == 64;
 
     case INDEX_op_negsetcond_i64:
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
         return TCG_TARGET_HAS_rot_i64;
     case INDEX_op_deposit_i64:
         return TCG_TARGET_HAS_deposit_i64;
-    case INDEX_op_extract_i64:
-        return TCG_TARGET_HAS_extract_i64;
-    case INDEX_op_sextract_i64:
-        return TCG_TARGET_HAS_sextract_i64;
     case INDEX_op_extract2_i64:
         return TCG_TARGET_HAS_extract2_i64;
     case INDEX_op_extrl_i64_i32:
diff --git a/tcg/tci.c b/tcg/tci.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
             break;
 #endif
-#if TCG_TARGET_HAS_extract_i32
         case INDEX_op_extract_i32:
             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
             regs[r0] = extract32(regs[r1], pos, len);
             break;
-#endif
-#if TCG_TARGET_HAS_sextract_i32
         case INDEX_op_sextract_i32:
             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
             regs[r0] = sextract32(regs[r1], pos, len);
             break;
-#endif
         case INDEX_op_brcond_i32:
             tci_args_rl(insn, tb_ptr, &r0, &ptr);
             if ((uint32_t)regs[r0]) {
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
             break;
 #endif
-#if TCG_TARGET_HAS_extract_i64
         case INDEX_op_extract_i64:
             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
             regs[r0] = extract64(regs[r1], pos, len);
             break;
-#endif
-#if TCG_TARGET_HAS_sextract_i64
         case INDEX_op_sextract_i64:
             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
             regs[r0] = sextract64(regs[r1], pos, len);
             break;
-#endif
         case INDEX_op_brcond_i64:
             tci_args_rl(insn, tb_ptr, &r0, &ptr);
             if (regs[r0]) {
-- 
2.43.0

Make deposit "unconditional" in the sense that the opcode is
always present.  Rely instead on TCG_TARGET_deposit_valid,
now always defined.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target-has.h     |  3 +--
 tcg/arm/tcg-target-has.h         |  2 +-
 tcg/i386/tcg-target-has.h        |  5 +----
 tcg/loongarch64/tcg-target-has.h |  3 +--
 tcg/mips/tcg-target-has.h        |  3 +--
 tcg/ppc/tcg-target-has.h         |  3 +--
 tcg/riscv/tcg-target-has.h       |  4 ++--
 tcg/s390x/tcg-target-has.h       |  3 +--
 tcg/sparc64/tcg-target-has.h     |  4 ++--
 tcg/tcg-has.h                    |  8 --------
 tcg/tci/tcg-target-has.h         |  3 +--
 tcg/tcg-op.c                     | 22 +++++++++++-----------
 tcg/tcg.c                        | 31 +++++++++++--------------------
 tcg/tci.c                        |  4 ----
 tcg/tci/tcg-target.c.inc         |  2 +-
 15 files changed, 35 insertions(+), 65 deletions(-)

diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target-has.h
+++ b/tcg/aarch64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract2_i32     1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          1
 #define TCG_TARGET_HAS_ctpop_i64        0
-#define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract2_i64     1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
 
 #define TCG_TARGET_extract_valid(type, ofs, len)   1
 #define TCG_TARGET_sextract_valid(type, ofs, len)  1
+#define TCG_TARGET_deposit_valid(type, ofs, len)   1
 
 #endif
diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target-has.h
+++ b/tcg/arm/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
 #define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
 #define TCG_TARGET_HAS_extract2_i32     1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_mulu2_i32        1
@@ -XXX,XX +XXX,XX @@ tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
 
 #define TCG_TARGET_extract_valid   tcg_target_extract_valid
 #define TCG_TARGET_sextract_valid  tcg_target_extract_valid
+#define TCG_TARGET_deposit_valid(type, ofs, len)  use_armv7_instructions
 
 #endif
diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target-has.h
+++ b/tcg/i386/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_ctpop_i32        have_popcnt
-#define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract2_i32     1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          1
 #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
-#define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract2_i64     1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_cmpsel_vec       1
 #define TCG_TARGET_HAS_tst_vec          have_avx512bw
 
-#define TCG_TARGET_deposit_i32_valid(ofs, len) \
+#define TCG_TARGET_deposit_valid(type, ofs, len) \
     (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
      (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
-#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
 
 /*
  * Check for the possibility of low byte/word extraction, high-byte extraction
diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target-has.h
+++ b/tcg/loongarch64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_rem_i32          1
 #define TCG_TARGET_HAS_div2_i32         0
 #define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_rem_i64          1
 #define TCG_TARGET_HAS_div2_i64         0
 #define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_extr_i64_i32     1
 #define TCG_TARGET_HAS_ext8s_i64        1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_tst_vec          0
 
 #define TCG_TARGET_extract_valid(type, ofs, len)   1
+#define TCG_TARGET_deposit_valid(type, ofs, len)   1
 
 static inline bool
 tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target-has.h
+++ b/tcg/mips/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #endif
 
 /* optional instructions detected at runtime */
-#define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_bswap16_i64      1
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_tst              0
 
 #define TCG_TARGET_extract_valid(type, ofs, len)  use_mips32r2_instructions
+#define TCG_TARGET_deposit_valid(type, ofs, len)  use_mips32r2_instructions
 
 static inline bool
 tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target-has.h
+++ b/tcg/ppc/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
 #define TCG_TARGET_HAS_ctpop_i32        have_isa_2_06
-#define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_mulu2_i32        0
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
 #define TCG_TARGET_HAS_ctpop_i64        have_isa_2_06
-#define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_tst_vec          0
 
 #define TCG_TARGET_extract_valid(type, ofs, len)   1
+#define TCG_TARGET_deposit_valid(type, ofs, len)   1
 
 static inline bool
 tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target-has.h
+++ b/tcg/riscv/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_rem_i32          1
 #define TCG_TARGET_HAS_div2_i32         0
 #define TCG_TARGET_HAS_rot_i32          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_rem_i64          1
 #define TCG_TARGET_HAS_div2_i64         0
 #define TCG_TARGET_HAS_rot_i64          (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_extr_i64_i32     1
 #define TCG_TARGET_HAS_ext8s_i64        1
@@ -XXX,XX +XXX,XX @@ tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
 }
 #define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
 
+#define TCG_TARGET_deposit_valid(type, ofs, len)  0
+
 #endif
diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390x/tcg-target-has.h
+++ b/tcg/s390x/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_clz_i32        0
 #define TCG_TARGET_HAS_ctz_i32        0
 #define TCG_TARGET_HAS_ctpop_i32      1
-#define TCG_TARGET_HAS_deposit_i32    1
 #define TCG_TARGET_HAS_extract2_i32   0
 #define TCG_TARGET_HAS_negsetcond_i32 1
 #define TCG_TARGET_HAS_add2_i32       1
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_clz_i64        1
 #define TCG_TARGET_HAS_ctz_i64        0
 #define TCG_TARGET_HAS_ctpop_i64      1
-#define TCG_TARGET_HAS_deposit_i64    1
 #define TCG_TARGET_HAS_extract2_i64   0
 #define TCG_TARGET_HAS_negsetcond_i64 1
 #define TCG_TARGET_HAS_add2_i64       1
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_tst_vec        0
 
 #define TCG_TARGET_extract_valid(type, ofs, len)   1
+#define TCG_TARGET_deposit_valid(type, ofs, len)   1
 
 static inline bool
 tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc64/tcg-target-has.h
+++ b/tcg/sparc64/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_clz_i32          0
 #define TCG_TARGET_HAS_ctz_i32          0
 #define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_clz_i64          0
 #define TCG_TARGET_HAS_ctz_i64          0
 #define TCG_TARGET_HAS_ctpop_i64        0
-#define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
 
 #define TCG_TARGET_sextract_valid  TCG_TARGET_extract_valid
 
+#define TCG_TARGET_deposit_valid(type, ofs, len) 0
+
 #endif
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-has.h
+++ b/tcg/tcg-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_clz_i64          0
 #define TCG_TARGET_HAS_ctz_i64          0
 #define TCG_TARGET_HAS_ctpop_i64        0
-#define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_negsetcond_i64   0
 #define TCG_TARGET_HAS_add2_i64         0
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_sub2_i32         1
 #endif
 
-#ifndef TCG_TARGET_deposit_i32_valid
-#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
-#endif
-#ifndef TCG_TARGET_deposit_i64_valid
-#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
-#endif
-
 /* Only one of DIV or DIV2 should be defined.  */
 #if defined(TCG_TARGET_HAS_div_i32)
 #define TCG_TARGET_HAS_div2_i32         0
diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target-has.h
+++ b/tcg/tci/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_ext8u_i32        1
 #define TCG_TARGET_HAS_ext16u_i32       1
 #define TCG_TARGET_HAS_andc_i32         1
-#define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract2_i32     0
 #define TCG_TARGET_HAS_eqv_i32          1
 #define TCG_TARGET_HAS_nand_i32         1
@@ -XXX,XX +XXX,XX @@
 #define TCG_TARGET_HAS_bswap16_i64      1
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          1
@@ -XXX,XX +XXX,XX @@
 
 #define TCG_TARGET_extract_valid(type, ofs, len)   1
 #define TCG_TARGET_sextract_valid(type, ofs, len)  1
+#define TCG_TARGET_deposit_valid(type, ofs, len)   1
 
 #endif
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
         tcg_gen_mov_i32(ret, arg2);
         return;
     }
-    if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+    if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) {
         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
         return;
     }
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
         tcg_gen_shli_i32(ret, arg, ofs);
     } else if (ofs == 0) {
         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
-    } else if (TCG_TARGET_HAS_deposit_i32
-               && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+    } else if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) {
         TCGv_i32 zero = tcg_constant_i32(0);
         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
     } else {
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
         tcg_gen_mov_i64(ret, arg2);
         return;
     }
-    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
-        tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
-        return;
-    }
 
-    if (TCG_TARGET_REG_BITS == 32) {
+    if (TCG_TARGET_REG_BITS == 64) {
+        if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) {
+            tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
+            return;
+        }
+    } else {
         if (ofs >= 32) {
             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
                                 TCGV_LOW(arg2), ofs - 32, len);
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
         tcg_gen_shli_i64(ret, arg, ofs);
     } else if (ofs == 0) {
         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
-    } else if (TCG_TARGET_HAS_deposit_i64
-               && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+    } else if (TCG_TARGET_REG_BITS == 64 &&
+               TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) {
         TCGv_i64 zero = tcg_constant_i64(0);
         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
     } else {
@@ -XXX,XX +XXX,XX @@ void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
     tcg_gen_extu_i32_i64(dest, low);
     /* If deposit is available, use it.  Otherwise use the extra
        knowledge that we have of the zero-extensions above.  */
-    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
+    if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, 32, 32)) {
         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
     } else {
         tcg_gen_shli_i64(tmp, tmp, 32);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_sar_i32:
     case INDEX_op_extract_i32:
     case INDEX_op_sextract_i32:
+    case INDEX_op_deposit_i32:
         return true;
 
     case INDEX_op_negsetcond_i32:
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_rotl_i32:
     case INDEX_op_rotr_i32:
         return TCG_TARGET_HAS_rot_i32;
-    case INDEX_op_deposit_i32:
-        return TCG_TARGET_HAS_deposit_i32;
     case INDEX_op_extract2_i32:
         return TCG_TARGET_HAS_extract2_i32;
     case INDEX_op_add2_i32:
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_extu_i32_i64:
     case INDEX_op_extract_i64:
     case INDEX_op_sextract_i64:
+    case INDEX_op_deposit_i64:
         return TCG_TARGET_REG_BITS == 64;
 
     case INDEX_op_negsetcond_i64:
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_rotl_i64:
     case INDEX_op_rotr_i64:
         return TCG_TARGET_HAS_rot_i64;
-    case INDEX_op_deposit_i64:
-        return TCG_TARGET_HAS_deposit_i64;
     case INDEX_op_extract2_i64:
         return TCG_TARGET_HAS_extract2_i64;
     case INDEX_op_extrl_i64_i32:
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
 
 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
 {
+    unsigned width;
+
+    tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
+    width = (type == TCG_TYPE_I32 ? 32 : 64);
+
+    tcg_debug_assert(ofs < width);
     tcg_debug_assert(len > 0);
-    switch (type) {
-    case TCG_TYPE_I32:
-        tcg_debug_assert(ofs < 32);
-        tcg_debug_assert(len <= 32);
-        tcg_debug_assert(ofs + len <= 32);
-        return TCG_TARGET_HAS_deposit_i32 &&
-               TCG_TARGET_deposit_i32_valid(ofs, len);
-    case TCG_TYPE_I64:
-        tcg_debug_assert(ofs < 64);
-        tcg_debug_assert(len <= 64);
-        tcg_debug_assert(ofs + len <= 64);
-        return TCG_TARGET_HAS_deposit_i64 &&
-               TCG_TARGET_deposit_i64_valid(ofs, len);
-    default:
-        g_assert_not_reached();
-    }
+    tcg_debug_assert(len <= width - ofs);
+
+    return TCG_TARGET_deposit_valid(type, ofs, len);
 }
 
 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
diff --git a/tcg/tci.c b/tcg/tci.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             regs[r0] = ror32(regs[r1], regs[r2] & 31);
             break;
 #endif
-#if TCG_TARGET_HAS_deposit_i32
         case INDEX_op_deposit_i32:
             tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
             regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
             break;
-#endif
         case INDEX_op_extract_i32:
             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
             regs[r0] = extract32(regs[r1], pos, len);
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             regs[r0] = ror64(regs[r1], regs[r2] & 63);
             break;
 #endif
-#if TCG_TARGET_HAS_deposit_i64
         case INDEX_op_deposit_i64:
             tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
             regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
             break;
-#endif
         case INDEX_op_extract_i64:
             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
             regs[r0] = extract64(regs[r1], pos, len);
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         tcg_out_op_rrr(s, opc, args[0], args[1], args[2]);
         break;
 
-    CASE_32_64(deposit)  /* Optional (TCG_TARGET_HAS_deposit_*). */
+    CASE_32_64(deposit)
         tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]);
         break;
 
-- 
2.43.0

Acked-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20250102181601.1421059-2-richard.henderson@linaro.org>
---
 host/include/riscv/host/cpuinfo.h |  5 +++--
 util/cpuinfo-riscv.c              | 18 ++++++++++++++++--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/host/include/riscv/host/cpuinfo.h b/host/include/riscv/host/cpuinfo.h
index XXXXXXX..XXXXXXX 100644
--- a/host/include/riscv/host/cpuinfo.h
+++ b/host/include/riscv/host/cpuinfo.h
@@ -XXX,XX +XXX,XX @@
 #define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
 #define CPUINFO_ZBA             (1u << 1)
 #define CPUINFO_ZBB             (1u << 2)
-#define CPUINFO_ZICOND          (1u << 3)
-#define CPUINFO_ZVE64X          (1u << 4)
+#define CPUINFO_ZBS             (1u << 3)
+#define CPUINFO_ZICOND          (1u << 4)
+#define CPUINFO_ZVE64X          (1u << 5)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/util/cpuinfo-riscv.c b/util/cpuinfo-riscv.c
index XXXXXXX..XXXXXXX 100644
--- a/util/cpuinfo-riscv.c
+++ b/util/cpuinfo-riscv.c
@@ -XXX,XX +XXX,XX @@ static void sigill_handler(int signo, siginfo_t *si, void *data)
 /* Called both as constructor and (possibly) via other constructors. */
 unsigned __attribute__((constructor)) cpuinfo_init(void)
 {
-    unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZICOND | CPUINFO_ZVE64X;
+    unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS
+                  | CPUINFO_ZICOND | CPUINFO_ZVE64X;
     unsigned info = cpuinfo;
 
     if (info) {
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
 #if defined(__riscv_arch_test) && defined(__riscv_zbb)
     info |= CPUINFO_ZBB;
 #endif
+#if defined(__riscv_arch_test) && defined(__riscv_zbs)
+    info |= CPUINFO_ZBS;
+#endif
 #if defined(__riscv_arch_test) && defined(__riscv_zicond)
     info |= CPUINFO_ZICOND;
 #endif
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
             && pair.key >= 0) {
             info |= pair.value & RISCV_HWPROBE_EXT_ZBA ? CPUINFO_ZBA : 0;
             info |= pair.value & RISCV_HWPROBE_EXT_ZBB ? CPUINFO_ZBB : 0;
-            left &= ~(CPUINFO_ZBA | CPUINFO_ZBB);
+            info |= pair.value & RISCV_HWPROBE_EXT_ZBS ? CPUINFO_ZBS : 0;
+            left &= ~(CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS);
 #ifdef RISCV_HWPROBE_EXT_ZICOND
             info |= pair.value & RISCV_HWPROBE_EXT_ZICOND ? CPUINFO_ZICOND : 0;
             left &= ~CPUINFO_ZICOND;
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
             left &= ~CPUINFO_ZBB;
         }
 
+        if (left & CPUINFO_ZBS) {
+            /* Probe for Zbs: bext zero,zero,zero. */
+            got_sigill = 0;
+            asm volatile(".insn r 0x33, 5, 0x24, zero, zero, zero"
+                         : : : "memory");
+            info |= got_sigill ? 0 : CPUINFO_ZBS;
+            left &= ~CPUINFO_ZBS;
+        }
+
         if (left & CPUINFO_ZICOND) {
             /* Probe for Zicond: czero.eqz zero,zero,zero. */
             got_sigill = 0;
-- 
2.43.0

Acked-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20250102181601.1421059-3-richard.henderson@linaro.org>
---
 tcg/riscv/tcg-target-has.h |  8 +++++++-
 tcg/riscv/tcg-target.c.inc | 11 +++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target-has.h
+++ b/tcg/riscv/tcg-target-has.h
@@ -XXX,XX +XXX,XX @@ tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
         /* ofs > 0 uses SRLIW; ofs == 0 uses add.uw. */
         return ofs || (cpuinfo & CPUINFO_ZBA);
     }
-    return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && len == 16;
+    switch (len) {
+    case 1:
+        return (cpuinfo & CPUINFO_ZBS) && ofs != 0;
+    case 16:
+        return (cpuinfo & CPUINFO_ZBB) && ofs == 0;
+    }
+    return false;
 }
 #define TCG_TARGET_extract_valid  tcg_target_extract_valid
 
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ typedef enum {
     OPC_ANDI = 0x7013,
     OPC_AUIPC = 0x17,
     OPC_BEQ = 0x63,
+    OPC_BEXTI = 0x48005013,
     OPC_BGE = 0x5063,
     OPC_BGEU = 0x7063,
     OPC_BLT = 0x4063,
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         }
         /* FALLTHRU */
     case INDEX_op_extract_i32:
-        if (a2 == 0 && args[3] == 16) {
+        switch (args[3]) {
+        case 1:
+            tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, a2);
+            break;
+        case 16:
+            tcg_debug_assert(a2 == 0);
             tcg_out_ext16u(s, a0, a1);
-        } else {
+            break;
+        default:
             g_assert_not_reached();
         }
         break;
-- 
2.43.0

From: Helge Deller <deller@kernel.org>

Add some missing fields which may be parsed by userspace applications.

Signed-off-by: Helge Deller <deller@gmx.de>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <Z39B1wzNNpndmOxZ@p100>
---
 linux-user/sparc/target_proc.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/linux-user/sparc/target_proc.h b/linux-user/sparc/target_proc.h
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/sparc/target_proc.h
+++ b/linux-user/sparc/target_proc.h
@@ -XXX,XX +XXX,XX @@
 
 static int open_cpuinfo(CPUArchState *cpu_env, int fd)
 {
-    dprintf(fd, "type\t\t: sun4u\n");
+    int i, num_cpus;
+    const char *cpu_type;
+
+    num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    if (cpu_env->def.features & CPU_FEATURE_HYPV) {
+        cpu_type = "sun4v";
+    } else {
+        cpu_type = "sun4u";
+    }
+
+    dprintf(fd, "cpu\t\t: %s (QEMU)\n", cpu_env->def.name);
+    dprintf(fd, "type\t\t: %s\n", cpu_type);
+    dprintf(fd, "ncpus probed\t: %d\n", num_cpus);
+    dprintf(fd, "ncpus active\t: %d\n", num_cpus);
+    dprintf(fd, "State:\n");
+    for (i = 0; i < num_cpus; i++) {
+        dprintf(fd, "CPU%d:\t\t: online\n", i);
+    }
+
     return 0;
 }
 #define HAVE_ARCH_PROC_CPUINFO
-- 
2.43.0

From: Ilya Leoshkevich <iii@linux.ibm.com>

These similarly named functions serve different purposes; add
docstrings to highlight them.

Suggested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20250116213214.5695-1-iii@linux.ibm.com>
---
 include/tcg/tcg.h    | 41 +++++++++++++++++++++++++++++++++++++++++
 accel/tcg/cpu-exec.c | 15 ++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ void tcg_region_reset_all(void);
 size_t tcg_code_size(void);
 size_t tcg_code_capacity(void);
 
+/**
+ * tcg_tb_insert:
+ * @tb: translation block to insert
+ *
+ * Insert @tb into the region trees.
+ */
 void tcg_tb_insert(TranslationBlock *tb);
+
+/**
+ * tcg_tb_remove:
+ * @tb: translation block to remove
+ *
+ * Remove @tb from the region trees.
+ */
 void tcg_tb_remove(TranslationBlock *tb);
+
+/**
+ * tcg_tb_lookup:
+ * @tc_ptr: host PC to look up
+ *
+ * Look up a translation block inside the region trees by @tc_ptr. This is
+ * useful for exception handling, but must not be used for the purposes of
+ * executing the returned translation block. See struct tb_tc for more
+ * information.
+ *
+ * Returns: a translation block previously inserted into the region trees,
+ * such that @tc_ptr points anywhere inside the code generated for it, or
+ * NULL.
+ */
 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr);
+
+/**
+ * tcg_tb_foreach:
+ * @func: callback
+ * @user_data: opaque value to pass to @callback
+ *
+ * Call @func for each translation block inserted into the region trees.
+ */
 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data);
+
+/**
+ * tcg_nb_tbs:
+ *
+ * Returns: the number of translation blocks inserted into the region trees.
+ */
 size_t tcg_nb_tbs(void);
 
 /* user-mode: Called with mmap_lock held.  */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 }
 
-/* Might cause an exception, so have a longjmp destination ready */
+/**
+ * tb_lookup:
+ * @cpu: CPU that will execute the returned translation block
+ * @pc: guest PC
+ * @cs_base: arch-specific value associated with translation block
+ * @flags: arch-specific translation block flags
+ * @cflags: CF_* flags
+ *
+ * Look up a translation block inside the QHT using @pc, @cs_base, @flags and
+ * @cflags. Uses @cpu's tb_jmp_cache. Might cause an exception, so have a
+ * longjmp destination ready.
+ *
+ * Returns: an existing translation block or NULL.
+ */
 static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
                                           uint64_t cs_base, uint32_t flags,
                                           uint32_t cflags)
-- 
2.43.0

From: Ilya Leoshkevich <iii@linux.ibm.com>

Currently one-insn TBs created from I/O memory are not added to
region_trees. Therefore, when they generate exceptions, they are not
handled by cpu_restore_state_from_tb().

For x86 this is not a problem, because x86_restore_state_to_opc() only
restores pc and cc, which already have the correct values if the first
TB instruction causes an exception. However, on several other
architectures, restore_state_to_opc() is not stricly limited to state
restoration and affects some exception-related registers, where guests
can notice incorrect values, for example:

- arm's exception.syndrome;
- hppa's unwind_breg;
- riscv's excp_uw2;
- s390x's int_pgm_ilen.

Fix by always calling tcg_tb_insert(). This may increase the size of
region_trees, but tcg_region_reset_all() clears it once code_gen_buffer
fills up, so it will not grow uncontrollably.

Do not call tb_link_page(), which would add such TBs to the QHT, to
prevent tb_lookup() from finding them. These TBs are single-use, since
subsequent reads from I/O memory may return different values; they are
not removed from code_gen_buffer only in order to keep things simple.

Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20250116213214.5695-2-iii@linux.ibm.com>
---
 accel/tcg/translate-all.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
         tb_reset_jump(tb, 1);
     }
 
-    /*
-     * If the TB is not associated with a physical RAM page then it must be
-     * a temporary one-insn TB, and we have nothing left to do. Return early
-     * before attempting to link to other TBs or add to the lookup table.
-     */
-    if (tb_page_addr0(tb) == -1) {
-        assert_no_pages_locked();
-        return tb;
-    }
-
     /*
      * Insert TB into the corresponding region tree before publishing it
      * through QHT. Otherwise rewinding happened in the TB might fail to
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
      */
     tcg_tb_insert(tb);
 
+    /*
+     * If the TB is not associated with a physical RAM page then it must be
+     * a temporary one-insn TB.
+     *
+     * Such TBs must be added to region trees in order to make sure that
+     * restore_state_to_opc() - which on some architectures is not limited to
+     * rewinding, but also affects exception handling! - is called when such a
+     * TB causes an exception.
+     *
+     * At the same time, temporary one-insn TBs must be executed at most once,
+     * because subsequent reads from, e.g., I/O memory may return different
+     * values. So return early before attempting to link to other TBs or add
+     * to the QHT.
+     */
+    if (tb_page_addr0(tb) == -1) {
+        assert_no_pages_locked();
+        return tb;
+    }
+
     /*
      * No explicit memory barrier is required -- tb_link_page() makes the
      * TB visible in a consistent state.
-- 
2.43.0

From: Philippe Mathieu-Daudé <philmd@linaro.org>

These helpers don't alter float_status. Make it const.

Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20250116214359.67295-1-philmd@linaro.org>
---
 include/fpu/softfloat-helpers.h | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
index XXXXXXX..XXXXXXX 100644
--- a/include/fpu/softfloat-helpers.h
+++ b/include/fpu/softfloat-helpers.h
@@ -XXX,XX +XXX,XX @@ static inline void set_no_signaling_nans(bool val, float_status *status)
     status->no_signaling_nans = val;
 }
 
-static inline bool get_float_detect_tininess(float_status *status)
+static inline bool get_float_detect_tininess(const float_status *status)
 {
     return status->tininess_before_rounding;
 }
 
-static inline FloatRoundMode get_float_rounding_mode(float_status *status)
+static inline FloatRoundMode get_float_rounding_mode(const float_status *status)
 {
     return status->float_rounding_mode;
 }
 
-static inline int get_float_exception_flags(float_status *status)
+static inline int get_float_exception_flags(const float_status *status)
 {
     return status->float_exception_flags;
 }
 
 static inline FloatX80RoundPrec
-get_floatx80_rounding_precision(float_status *status)
+get_floatx80_rounding_precision(const float_status *status)
 {
     return status->floatx80_rounding_precision;
 }
 
-static inline Float2NaNPropRule get_float_2nan_prop_rule(float_status *status)
+static inline Float2NaNPropRule
+get_float_2nan_prop_rule(const float_status *status)
 {
     return status->float_2nan_prop_rule;
 }
 
-static inline Float3NaNPropRule get_float_3nan_prop_rule(float_status *status)
+static inline Float3NaNPropRule
+get_float_3nan_prop_rule(const float_status *status)
 {
     return status->float_3nan_prop_rule;
 }
 
-static inline FloatInfZeroNaNRule get_float_infzeronan_rule(float_status *status)
+static inline FloatInfZeroNaNRule
+get_float_infzeronan_rule(const float_status *status)
 {
     return status->float_infzeronan_rule;
 }
 
-static inline uint8_t get_float_default_nan_pattern(float_status *status)
+static inline uint8_t get_float_default_nan_pattern(const float_status *status)
 {
     return status->default_nan_pattern;
 }
 
-static inline bool get_flush_to_zero(float_status *status)
+static inline bool get_flush_to_zero(const float_status *status)
 {
     return status->flush_to_zero;
 }
 
-static inline bool get_flush_inputs_to_zero(float_status *status)
+static inline bool get_flush_inputs_to_zero(const float_status *status)
 {
     return status->flush_inputs_to_zero;
 }
 
-static inline bool get_default_nan_mode(float_status *status)
+static inline bool get_default_nan_mode(const float_status *status)
 {
     return status->default_nan_mode;
 }
-- 
2.43.0