Series comparison

-[PULL 00/11] tcg patch queue
+[PULL v2 00/15] tcg patch queue
-The following changes since commit 6eeea6725a70e6fcb5abba0764496bdab07ddfb3:
+Second try's the charm today, right?
-  Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2020-10-06' into staging (2020-10-06 21:13:34 +0100)
 r~
 The following changes since commit 00b1faea41d283e931256aa78aa975a369ec3ae6:
   Merge tag 'pull-target-arm-20230123' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-01-23 13:40:28 +0000)
 are available in the Git repository at:
-  https://github.com/rth7680/qemu.git tags/pull-tcg-20201008
+  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230123
-for you to fetch changes up to 62475e9d007d83db4d0a6ccebcda8914f392e9c9:
+for you to fetch changes up to 709bcd7da3f6b4655d910634a0d520fa1439df38:
-  accel/tcg: Fix computing of is_write for MIPS (2020-10-08 05:57:32 -0500)
+  tcg/loongarch64: Reorg goto_tb implementation (2023-01-23 16:00:13 -1000)
 ----------------------------------------------------------------
-Extend maximum gvec vector size
+common-user: Re-enable ppc32 host
-Fix i386 avx2 dupi
+tcg: Avoid recursion in tcg_gen_mulu2_i32
-Fix mips host user-only write detection
+tcg: Mark tcg helpers noinline to avoid an issue with LTO
-Misc cleanups.
+tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
 disas: Enable loongarch disassembler, and fixes
 tcg/loongarch64: Improve move immediate
 tcg/loongarch64: Improve add immediate
 tcg/loongarch64: Improve setcond
 tcg/loongarch64: Implement movcond
 tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
 tcg/loongarch64: Reorg goto_tb implementation
 ----------------------------------------------------------------
-Kele Huang (1):
+Richard Henderson (14):
-      accel/tcg: Fix computing of is_write for MIPS
+      tcg: Avoid recursion in tcg_gen_mulu2_i32
       tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
       common-user/host/ppc: Implement safe-syscall.inc.S
       linux-user: Implment host/ppc/host-signal.h
       tcg: Mark tcg helpers noinline to avoid an issue with LTO
       target/loongarch: Enable the disassembler for host tcg
       target/loongarch: Disassemble jirl properly
       target/loongarch: Disassemble pcadd* addresses
       tcg/loongarch64: Update tcg-insn-defs.c.inc
       tcg/loongarch64: Introduce tcg_out_addi
       tcg/loongarch64: Improve setcond expansion
       tcg/loongarch64: Implement movcond
       tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
       tcg/loongarch64: Reorg goto_tb implementation
-Richard Henderson (10):
+Rui Wang (1):
-      tcg: Adjust simd_desc size encoding
+      tcg/loongarch64: Optimize immediate loading
       tcg: Drop union from TCGArgConstraint
       tcg: Move sorted_args into TCGArgConstraint.sort_index
       tcg: Remove TCG_CT_REG
       tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields
       tcg: Remove TCGOpDef.used
       tcg/i386: Fix dupi for avx2 32-bit hosts
       tcg: Fix generation of dupi_vec for 32-bit host
       tcg/optimize: Fold dup2_vec
       tcg: Remove TCG_TARGET_HAS_cmp_vec
- include/tcg/tcg-gvec-desc.h  | 38 ++++++++++++------
+ include/exec/helper-proto.h                    |  32 ++-
- include/tcg/tcg.h            | 22 ++++------
+ include/tcg/tcg.h                              |   7 -
- tcg/aarch64/tcg-target.h     |  1 -
+ linux-user/include/host/ppc/host-signal.h      |  39 +++
- tcg/i386/tcg-target.h        |  1 -
+ tcg/arm/tcg-target-con-set.h                   |   7 +-
- tcg/ppc/tcg-target.h         |  1 -
+ tcg/arm/tcg-target-con-str.h                   |   2 +
- accel/tcg/user-exec.c        | 43 ++++++++++++++++++--
+ tcg/loongarch64/tcg-target-con-set.h           |   5 +-
- tcg/optimize.c               | 15 +++++++
+ tcg/loongarch64/tcg-target-con-str.h           |   2 +-
- tcg/tcg-op-gvec.c            | 35 ++++++++++++----
+ tcg/loongarch64/tcg-target.h                   |  11 +-
- tcg/tcg-op-vec.c             | 12 ++++--
+ target/loongarch/insns.decode                  |   3 +-
- tcg/tcg.c                    | 96 +++++++++++++++++++-------------------------
+ disas.c                                        |   2 +
- tcg/aarch64/tcg-target.c.inc | 17 ++++----
+ target/loongarch/disas.c                       |  39 ++-
- tcg/arm/tcg-target.c.inc     | 29 ++++++-------
+ tcg/tcg-op.c                                   |   4 +-
- tcg/i386/tcg-target.c.inc    | 39 +++++++-----------
+ target/loongarch/insn_trans/trans_branch.c.inc |   2 +-
- tcg/mips/tcg-target.c.inc    | 21 +++++-----
+ tcg/arm/tcg-target.c.inc                       |  28 +-
- tcg/ppc/tcg-target.c.inc     | 29 ++++++-------
+ tcg/loongarch64/tcg-insn-defs.c.inc            |  10 +-
- tcg/riscv/tcg-target.c.inc   | 16 ++++----
+ tcg/loongarch64/tcg-target.c.inc               | 364 ++++++++++++++++---------
- tcg/s390/tcg-target.c.inc    | 22 +++++-----
+ common-user/host/ppc/safe-syscall.inc.S        | 107 ++++++++
- tcg/sparc/tcg-target.c.inc   | 21 ++++------
+ target/loongarch/meson.build                   |   3 +-
- tcg/tci/tcg-target.c.inc     |  3 +-
+files changed, 497 insertions(+), 170 deletions(-)
-files changed, 244 insertions(+), 217 deletions(-)
+ create mode 100644 linux-user/include/host/ppc/host-signal.h
+ create mode 100644 common-user/host/ppc/safe-syscall.inc.S

-[PULL 06/11] tcg: Remove TCGOpDef.used
+[PULL v2 01/15] tcg: Avoid recursion in tcg_gen_mulu2_i32
-The last user of this field disappeared in f69d277ece4.
+We have a test for one of TCG_TARGET_HAS_mulu2_i32 or
 TCG_TARGET_HAS_muluh_i32 being defined, but the test
 became non-functional when we changed to always define
 all of these macros.
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Replace this with a build-time test in tcg_gen_mulu2_i32.
 Fixes: 25c4d9cc845 ("tcg: Always define all of the TCGOpcode enum members.")
 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1435
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/tcg/tcg.h | 3 ---
+ include/tcg/tcg.h | 7 -------
-file changed, 3 deletions(-)
+ tcg/tcg-op.c      | 4 +++-
 files changed, 3 insertions(+), 8 deletions(-)
 diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/tcg/tcg.h
 +++ b/include/tcg/tcg.h
-@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
+@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
-     uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
+ #define TCG_TARGET_HAS_rem_i64          0
-     uint8_t flags;
+ #endif
-     TCGArgConstraint *args_ct;
--#if defined(CONFIG_DEBUG_TCG)
+-/* For 32-bit targets, some sort of unsigned widening multiply is required.  */
--    int used;
+-#if TCG_TARGET_REG_BITS == 32 \
 -    && !(defined(TCG_TARGET_HAS_mulu2_i32) \
 -         || defined(TCG_TARGET_HAS_muluh_i32))
 -# error "Missing unsigned widening multiply"
 -#endif
- } TCGOpDef;
+-
+ #if !defined(TCG_TARGET_HAS_v64) \
- extern TCGOpDef tcg_op_defs[];
+     && !defined(TCG_TARGET_HAS_v128) \
      && !defined(TCG_TARGET_HAS_v256)
 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-op.c
 +++ b/tcg/tcg-op.c
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
          tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
          tcg_gen_mov_i32(rl, t);
          tcg_temp_free_i32(t);
 -    } else {
 +    } else if (TCG_TARGET_REG_BITS == 64) {
          TCGv_i64 t0 = tcg_temp_new_i64();
          TCGv_i64 t1 = tcg_temp_new_i64();
          tcg_gen_extu_i32_i64(t0, arg1);
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
          tcg_gen_extr_i64_i32(rl, rh, t0);
          tcg_temp_free_i64(t0);
          tcg_temp_free_i64(t1);
 +    } else {
 +        qemu_build_not_reached();
      }
  }
 --
-.25.1
+.34.1

-[PULL 04/11] tcg: Remove TCG_CT_REG
+[PULL v2 02/15] tcg/arm: Use register pair allocation for qemu_{ld, st}_i64
-This wasn't actually used for anything, really.  All variable
+Although we still can't use ldrd and strd for all operations,
-operands must accept registers, and which are indicated by the
+increase the chances by getting the register allocation correct.
 set in TCGArgConstraint.regs.
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/tcg/tcg.h            |  1 -
+ tcg/arm/tcg-target-con-set.h |  7 ++++---
- tcg/tcg.c                    | 15 ++++-----------
+ tcg/arm/tcg-target-con-str.h |  2 ++
- tcg/aarch64/tcg-target.c.inc |  3 ---
+ tcg/arm/tcg-target.c.inc     | 28 ++++++++++++++++++----------
- tcg/arm/tcg-target.c.inc     |  3 ---
+files changed, 24 insertions(+), 13 deletions(-)
  tcg/i386/tcg-target.c.inc    | 11 -----------
  tcg/mips/tcg-target.c.inc    |  3 ---
  tcg/ppc/tcg-target.c.inc     |  5 -----
  tcg/riscv/tcg-target.c.inc   |  2 --
  tcg/s390/tcg-target.c.inc    |  4 ----
  tcg/sparc/tcg-target.c.inc   |  5 -----
  tcg/tci/tcg-target.c.inc     |  1 -
 files changed, 4 insertions(+), 49 deletions(-)
-diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/tcg/tcg.h
+--- a/tcg/arm/tcg-target-con-set.h
-+++ b/include/tcg/tcg.h
++++ b/tcg/arm/tcg-target-con-set.h
-@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
+@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, rIN)
- #define TCG_CT_ALIAS  0x80
+ C_O0_I2(s, s)
- #define TCG_CT_IALIAS 0x40
+ C_O0_I2(w, r)
- #define TCG_CT_NEWREG 0x20 /* output requires a new register */
+ C_O0_I3(s, s, s)
--#define TCG_CT_REG    0x01
++C_O0_I3(S, p, s)
- #define TCG_CT_CONST  0x02 /* any constant of register size */
+ C_O0_I4(r, r, rI, rI)
+-C_O0_I4(s, s, s, s)
- typedef struct TCGArgConstraint {
++C_O0_I4(S, p, s, s)
-diff --git a/tcg/tcg.c b/tcg/tcg.c
+ C_O1_I1(r, l)
  C_O1_I1(r, r)
  C_O1_I1(w, r)
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wZ)
  C_O1_I3(w, w, w, w)
  C_O1_I4(r, r, r, rI, rI)
  C_O1_I4(r, r, rIN, rIK, 0)
 -C_O2_I1(r, r, l)
 -C_O2_I2(r, r, l, l)
 +C_O2_I1(e, p, l)
 +C_O2_I2(e, p, l, l)
  C_O2_I2(r, r, r, r)
  C_O2_I4(r, r, r, r, rIN, rIK)
  C_O2_I4(r, r, rI, rI, rIN, rIK)
 diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/tcg.c
+--- a/tcg/arm/tcg-target-con-str.h
-+++ b/tcg/tcg.c
++++ b/tcg/arm/tcg-target-con-str.h
-@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
+@@ -XXX,XX +XXX,XX @@
- /* we give more priority to constraints with less registers */
+  * Define constraint letters for register sets:
- static int get_constraint_priority(const TCGOpDef *def, int k)
+  * REGS(letter, register_mask)
- {
+  */
--    const TCGArgConstraint *arg_ct;
++REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
-+    const TCGArgConstraint *arg_ct = &def->args_ct[k];
+ REGS('r', ALL_GENERAL_REGS)
-+    int n;
+ REGS('l', ALL_QLOAD_REGS)
+ REGS('s', ALL_QSTORE_REGS)
--    int i, n;
++REGS('S', ALL_QSTORE_REGS & 0x5555)  /* even qstore */
--    arg_ct = &def->args_ct[k];
+ REGS('w', ALL_VECTOR_REGS)
-     if (arg_ct->ct & TCG_CT_ALIAS) {
-         /* an alias is equivalent to a single register */
+ /*
          n = 1;
      } else {
 -        if (!(arg_ct->ct & TCG_CT_REG))
 -            return 0;
 -        n = 0;
 -        for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
 -            if (tcg_regset_test_reg(arg_ct->regs, i))
 -                n++;
 -        }
 +        n = ctpop64(arg_ct->regs);
      }
      return TCG_TARGET_NB_REGS - n + 1;
  }
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
                          int oarg = *ct_str - '0';
                          tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
                          tcg_debug_assert(oarg < def->nb_oargs);
 -                        tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
 +                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
                          /* TCG_CT_ALIAS is for the output arguments.
                             The input is tagged with TCG_CT_IALIAS. */
                          def->args_ct[i] = def->args_ct[oarg];
 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/aarch64/tcg-target.c.inc
 +++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  {
      switch (*ct_str++) {
      case 'r': /* general registers */
 -        ct->ct |= TCG_CT_REG;
          ct->regs |= 0xffffffffu;
          break;
      case 'w': /* advsimd registers */
 -        ct->ct |= TCG_CT_REG;
          ct->regs |= 0xffffffff00000000ull;
          break;
      case 'l': /* qemu_ld / qemu_st address, data_reg */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffffu;
  #ifdef CONFIG_SOFTMMU
          /* x0 and x1 will be overwritten when reading the tlb entry,
 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/arm/tcg-target.c.inc
 +++ b/tcg/arm/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
          tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
          break;
+     case MO_UQ:
-     case 'r':
++        /* We used pair allocation for datalo, so already should be aligned. */
--        ct->ct |= TCG_CT_REG;
++        tcg_debug_assert((datalo & 1) == 0);
-         ct->regs = 0xffff;
++        tcg_debug_assert(datahi == datalo + 1);
          /* LDRD requires alignment; double-check that. */
 -        if (get_alignment_bits(opc) >= MO_64
 -            && (datalo & 1) == 0 && datahi == datalo + 1) {
 +        if (get_alignment_bits(opc) >= MO_64) {
              /*
               * Rm (the second address op) must not overlap Rt or Rt + 1.
               * Since datalo is aligned, we can simplify the test via alignment.
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
          tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
          break;
+     case MO_UQ:
-     /* qemu_ld address */
++        /* We used pair allocation for datalo, so already should be aligned. */
-     case 'l':
++        tcg_debug_assert((datalo & 1) == 0);
--        ct->ct |= TCG_CT_REG;
++        tcg_debug_assert(datahi == datalo + 1);
-         ct->regs = 0xffff;
+         /* LDRD requires alignment; double-check that. */
- #ifdef CONFIG_SOFTMMU
+-        if (get_alignment_bits(opc) >= MO_64
-         /* r0-r2,lr will be overwritten when reading the tlb entry,
+-            && (datalo & 1) == 0 && datahi == datalo + 1) {
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
++        if (get_alignment_bits(opc) >= MO_64) {
+             tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
-     /* qemu_st address & data */
+         } else if (datalo == addrlo) {
-     case 's':
+             tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
--        ct->ct |= TCG_CT_REG;
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
-         ct->regs = 0xffff;
+         tcg_out_st32_r(s, cond, datalo, addrlo, addend);
          /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
             and r0-r1 doing the byte swapping, so don't use these. */
 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/i386/tcg-target.c.inc
 +++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  {
      switch(*ct_str++) {
      case 'a':
 -        ct->ct |= TCG_CT_REG;
          tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
          break;
-     case 'b':
+     case MO_64:
--        ct->ct |= TCG_CT_REG;
++        /* We used pair allocation for datalo, so already should be aligned. */
-         tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
++        tcg_debug_assert((datalo & 1) == 0);
 +        tcg_debug_assert(datahi == datalo + 1);
          /* STRD requires alignment; double-check that. */
 -        if (get_alignment_bits(opc) >= MO_64
 -            && (datalo & 1) == 0 && datahi == datalo + 1) {
 +        if (get_alignment_bits(opc) >= MO_64) {
              tcg_out_strd_r(s, cond, datalo, addrlo, addend);
          } else if (scratch_addend) {
              tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
          tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
          break;
-     case 'c':
+     case MO_64:
--        ct->ct |= TCG_CT_REG;
++        /* We used pair allocation for datalo, so already should be aligned. */
-         tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
++        tcg_debug_assert((datalo & 1) == 0);
-         break;
++        tcg_debug_assert(datahi == datalo + 1);
-     case 'd':
+         /* STRD requires alignment; double-check that. */
--        ct->ct |= TCG_CT_REG;
+-        if (get_alignment_bits(opc) >= MO_64
-         tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
+-            && (datalo & 1) == 0 && datahi == datalo + 1) {
-         break;
++        if (get_alignment_bits(opc) >= MO_64) {
-     case 'S':
+             tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
--        ct->ct |= TCG_CT_REG;
+         } else {
-         tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
+             tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
-         break;
+@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-     case 'D':
+     case INDEX_op_qemu_ld_i32:
--        ct->ct |= TCG_CT_REG;
+         return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
-         tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
+     case INDEX_op_qemu_ld_i64:
-         break;
+-        return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l);
-     case 'q':
++        return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
-         /* A register that can be used as a byte operand.  */
+     case INDEX_op_qemu_st_i32:
--        ct->ct |= TCG_CT_REG;
+         return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
-         ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
+     case INDEX_op_qemu_st_i64:
-         break;
+-        return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
-     case 'Q':
++        return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
-         /* A register with an addressable second byte (e.g. %ah).  */
--        ct->ct |= TCG_CT_REG;
+     case INDEX_op_st_vec:
-         ct->regs = 0xf;
+         return C_O0_I2(w, r);
          break;
      case 'r':
          /* A general register.  */
 -        ct->ct |= TCG_CT_REG;
          ct->regs |= ALL_GENERAL_REGS;
          break;
      case 'W':
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
          break;
      case 'x':
          /* A vector register.  */
 -        ct->ct |= TCG_CT_REG;
          ct->regs |= ALL_VECTOR_REGS;
          break;
          /* qemu_ld/st address constraint */
      case 'L':
 -        ct->ct |= TCG_CT_REG;
          ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
          tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
          tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/mips/tcg-target.c.inc
 +++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  {
      switch(*ct_str++) {
      case 'r':
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          break;
      case 'L': /* qemu_ld input arg constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
  #if defined(CONFIG_SOFTMMU)
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  #endif
          break;
      case 'S': /* qemu_st constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
  #if defined(CONFIG_SOFTMMU)
 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target.c.inc
 +++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  {
      switch (*ct_str++) {
      case 'A': case 'B': case 'C': case 'D':
 -        ct->ct |= TCG_CT_REG;
          tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
          break;
      case 'r':
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          break;
      case 'v':
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff00000000ull;
          break;
      case 'L':                   /* qemu_ld constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
  #ifdef CONFIG_SOFTMMU
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  #endif
          break;
      case 'S':                   /* qemu_st constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
  #ifdef CONFIG_SOFTMMU
 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/riscv/tcg-target.c.inc
 +++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  {
      switch (*ct_str++) {
      case 'r':
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          break;
      case 'L':
          /* qemu_ld/qemu_st constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
  #if defined(CONFIG_SOFTMMU)
 diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/s390/tcg-target.c.inc
 +++ b/tcg/s390/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  {
      switch (*ct_str++) {
      case 'r':                  /* all registers */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffff;
          break;
      case 'L':                  /* qemu_ld/st constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffff;
          tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
          tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
          tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
          break;
      case 'a':                  /* force R2 for division */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0;
          tcg_regset_set_reg(ct->regs, TCG_REG_R2);
          break;
      case 'b':                  /* force R3 for division */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0;
          tcg_regset_set_reg(ct->regs, TCG_REG_R3);
          break;
 diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/sparc/tcg-target.c.inc
 +++ b/tcg/sparc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
  {
      switch (*ct_str++) {
      case 'r':
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          break;
      case 'R':
 -        ct->ct |= TCG_CT_REG;
          ct->regs = ALL_64;
          break;
      case 'A': /* qemu_ld/st address constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
      reserve_helpers:
          tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
          tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
          break;
      case 's': /* qemu_st data 32-bit constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = 0xffffffff;
          goto reserve_helpers;
      case 'S': /* qemu_st data 64-bit constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = ALL_64;
          goto reserve_helpers;
      case 'I':
 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tci/tcg-target.c.inc
 +++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
      case 'r':
      case 'L':                   /* qemu_ld constraint */
      case 'S':                   /* qemu_st constraint */
 -        ct->ct |= TCG_CT_REG;
          ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
          break;
      default:
 --
-.25.1
+.34.1

-New patch
+[PULL v2 03/15] common-user/host/ppc: Implement safe-syscall.inc.S
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
+Message-Id: <20220729172141.1789105-2-richard.henderson@linaro.org>
+---
+ common-user/host/ppc/safe-syscall.inc.S | 107 ++++++++++++++++++++++++
+file changed, 107 insertions(+)
+ create mode 100644 common-user/host/ppc/safe-syscall.inc.S
+diff --git a/common-user/host/ppc/safe-syscall.inc.S b/common-user/host/ppc/safe-syscall.inc.S
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/common-user/host/ppc/safe-syscall.inc.S
+@@ -XXX,XX +XXX,XX @@
++/*
++ * safe-syscall.inc.S : host-specific assembly fragment
++ * to handle signals occurring at the same time as system calls.
++ * This is intended to be included by common-user/safe-syscall.S
++ *
++ * Copyright (C) 2022 Linaro, Ltd.
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2 or later.
++ * See the COPYING file in the top-level directory.
++ */
++
++/*
++ * Standardize on the _CALL_FOO symbols used by GCC:
++ * Apple XCode does not define _CALL_DARWIN.
++ * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
++ */
++#if !defined(_CALL_SYSV) && \
++    !defined(_CALL_DARWIN) && \
++    !defined(_CALL_AIX) && \
++    !defined(_CALL_ELF)
++# if defined(__APPLE__)
++#  define _CALL_DARWIN
++# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
++#  define _CALL_SYSV
++# else
++#  error "Unknown ABI"
++# endif
++#endif
++
++#ifndef _CALL_SYSV
++# error "Unsupported ABI"
++#endif
++
++
++        .global safe_syscall_base
++        .global safe_syscall_start
++        .global safe_syscall_end
++        .type   safe_syscall_base, @function
++
++        .text
++
++        /*
++         * This is the entry point for making a system call. The calling
++         * convention here is that of a C varargs function with the
++         * first argument an 'int *' to the signal_pending flag, the
++         * second one the system call number (as a 'long'), and all further
++         * arguments being syscall arguments (also 'long').
++         */
++safe_syscall_base:
++        .cfi_startproc
++        stwu    1, -8(1)
++        .cfi_def_cfa_offset 8
++        stw     30, 4(1)
++        .cfi_offset 30, -4
++
++        /*
++         * We enter with r3 == &signal_pending
++         *               r4 == syscall number
++         *               r5 ... r10 == syscall arguments
++         *               and return the result in r3
++         * and the syscall instruction needs
++         *               r0 == syscall number
++         *               r3 ... r8 == syscall arguments
++         *               and returns the result in r3
++         * Shuffle everything around appropriately.
++         */
++        mr      30, 3           /* signal_pending */
++        mr      0, 4            /* syscall number */
++        mr      3, 5            /* syscall arguments */
++        mr      4, 6
++        mr      5, 7
++        mr      6, 8
++        mr      7, 9
++        mr      8, 10
++
++        /*
++         * This next sequence of code works in conjunction with the
++         * rewind_if_safe_syscall_function(). If a signal is taken
++         * and the interrupted PC is anywhere between 'safe_syscall_start'
++         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
++         * The code sequence must therefore be able to cope with this, and
++         * the syscall instruction must be the final one in the sequence.
++         */
++safe_syscall_start:
++        /* if signal_pending is non-zero, don't do the call */
++        lwz     12, 0(30)
++        cmpwi   0, 12, 0
++        bne-    2f
++        sc
++safe_syscall_end:
++        /* code path when we did execute the syscall */
++        lwz     30, 4(1)        /* restore r30 */
++        addi    1, 1, 8         /* restore stack */
++        .cfi_restore 30
++        .cfi_def_cfa_offset 0
++        bnslr+                  /* return on success */
++        b       safe_syscall_set_errno_tail
++
++        /* code path when we didn't execute the syscall */
++2:      lwz     30, 4(1)
++        addi    1, 1, 8
++        addi    3, 0, QEMU_ERESTARTSYS
++        b       safe_syscall_set_errno_tail
++
++        .cfi_endproc
++
++        .size   safe_syscall_base, .-safe_syscall_base
+--
+.34.1

-New patch
+[PULL v2 04/15] linux-user: Implment host/ppc/host-signal.h
+This commit re-enables ppc32 as a linux-user host,
+as existance of the directory is noted by configure.
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1097
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
+Message-Id: <20220729172141.1789105-3-richard.henderson@linaro.org>
+---
+ linux-user/include/host/ppc/host-signal.h | 39 +++++++++++++++++++++++
+file changed, 39 insertions(+)
+ create mode 100644 linux-user/include/host/ppc/host-signal.h
+diff --git a/linux-user/include/host/ppc/host-signal.h b/linux-user/include/host/ppc/host-signal.h
+new file mode 100644
+index XXXXXXX..XXXXXXX
+--- /dev/null
++++ b/linux-user/include/host/ppc/host-signal.h
+@@ -XXX,XX +XXX,XX @@
++/*
++ * host-signal.h: signal info dependent on the host architecture
++ *
++ * Copyright (c) 2022 Linaro Ltd.
++ *
++ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
++ * See the COPYING file in the top-level directory.
++ */
++
++#ifndef PPC_HOST_SIGNAL_H
++#define PPC_HOST_SIGNAL_H
++
++#include <asm/ptrace.h>
++
++/* The third argument to a SA_SIGINFO handler is ucontext_t. */
++typedef ucontext_t host_sigcontext;
++
++static inline uintptr_t host_signal_pc(host_sigcontext *uc)
++{
++    return uc->uc_mcontext.regs->nip;
++}
++
++static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc)
++{
++    uc->uc_mcontext.regs->nip = pc;
++}
++
++static inline void *host_signal_mask(host_sigcontext *uc)
++{
++    return &uc->uc_sigmask;
++}
++
++static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc)
++{
++    return uc->uc_mcontext.regs->trap != 0x400
++        && (uc->uc_mcontext.regs->dsisr & 0x02000000);
++}
++
++#endif
+--
+.34.1

-[PULL 11/11] accel/tcg: Fix computing of is_write for MIPS
+[PULL v2 05/15] tcg: Mark tcg helpers noinline to avoid an issue with LTO
-From: Kele Huang <kele.hwang@gmail.com>
+Marking helpers __attribute__((noinline)) prevents an issue
 with GCC's ipa-split pass under --enable-lto.
-Detect all MIPS store instructions in cpu_signal_handler for all available
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1454
-MIPS versions, and set is_write if encountering such store instructions.
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Tested-by: Idan Horowitz <idan.horowitz@gmail.com>
 This fixed the error while dealing with self-modified code for MIPS.
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Signed-off-by: Kele Huang <kele.hwang@gmail.com>
 Signed-off-by: Xu Zou <iwatchnima@gmail.com>
 Message-Id: <20201002081420.10814-1-kele.hwang@gmail.com>
 [rth: Use uintptr_t for pc to fix n32 build error.]
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/user-exec.c | 43 +++++++++++++++++++++++++++++++++++++++----
+ include/exec/helper-proto.h | 32 ++++++++++++++++++++++++--------
-file changed, 39 insertions(+), 4 deletions(-)
+file changed, 24 insertions(+), 8 deletions(-)
-diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
+diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/user-exec.c
+--- a/include/exec/helper-proto.h
-+++ b/accel/tcg/user-exec.c
++++ b/include/exec/helper-proto.h
-@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
+@@ -XXX,XX +XXX,XX @@
- #elif defined(__mips__)
+ #include "exec/helper-head.h"
-+#if defined(__misp16) || defined(__mips_micromips)
++/*
-+#error "Unsupported encoding"
++ * Work around an issue with --enable-lto, in which GCC's ipa-split pass
-+#endif
++ * decides to split out the noreturn code paths that raise an exception,
 + * taking the __builtin_return_address() along into the new function,
 + * where it no longer computes a value that returns to TCG generated code.
 + * Despite the name, the noinline attribute affects splitter, so this
 + * prevents the optimization in question.  Given that helpers should not
 + * otherwise be called directly, this should have any other visible effect.
 + *
 + * See https://gitlab.com/qemu-project/qemu/-/issues/1454
 + */
 +#define DEF_HELPER_ATTR  __attribute__((noinline))
 +
- int cpu_signal_handler(int host_signum, void *pinfo,
+ #define DEF_HELPER_FLAGS_0(name, flags, ret) \
-                        void *puc)
+-dh_ctype(ret) HELPER(name) (void);
- {
++dh_ctype(ret) HELPER(name) (void) DEF_HELPER_ATTR;
-     siginfo_t *info = pinfo;
-     ucontext_t *uc = puc;
+ #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
--    greg_t pc = uc->uc_mcontext.pc;
+-dh_ctype(ret) HELPER(name) (dh_ctype(t1));
--    int is_write;
++dh_ctype(ret) HELPER(name) (dh_ctype(t1)) DEF_HELPER_ATTR;
-+    uintptr_t pc = uc->uc_mcontext.pc;
-+    uint32_t insn = *(uint32_t *)pc;
+ #define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
-+    int is_write = 0;
+-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
-+
++dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)) DEF_HELPER_ATTR;
-+    /* Detect all store instructions at program counter. */
-+    switch((insn >> 26) & 077) {
+ #define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
-+    case 050: /* SB */
+-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3));
-+    case 051: /* SH */
++dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), \
-+    case 052: /* SWL */
++                            dh_ctype(t3)) DEF_HELPER_ATTR;
-+    case 053: /* SW */
-+    case 054: /* SDL */
+ #define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
-+    case 055: /* SDR */
+ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-+    case 056: /* SWR */
+-                                   dh_ctype(t4));
-+    case 070: /* SC */
++                            dh_ctype(t4)) DEF_HELPER_ATTR;
-+    case 071: /* SWC1 */
-+    case 074: /* SCD */
+ #define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \
-+    case 075: /* SDC1 */
+ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-+    case 077: /* SD */
+-                            dh_ctype(t4), dh_ctype(t5));
-+#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
++                            dh_ctype(t4), dh_ctype(t5)) DEF_HELPER_ATTR;
-+    case 072: /* SWC2 */
-+    case 076: /* SDC2 */
+ #define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \
-+#endif
+ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-+        is_write = 1;
+-                            dh_ctype(t4), dh_ctype(t5), dh_ctype(t6));
-+        break;
++                            dh_ctype(t4), dh_ctype(t5), \
-+    case 023: /* COP1X */
++                            dh_ctype(t6)) DEF_HELPER_ATTR;
-+        /* Required in all versions of MIPS64 since
-+           MIPS64r1 and subsequent versions of MIPS32r2. */
+ #define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
-+        switch (insn & 077) {
+ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-+        case 010: /* SWXC1 */
+                             dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
-+        case 011: /* SDXC1 */
+-                            dh_ctype(t7));
-+        case 015: /* SUXC1 */
++                            dh_ctype(t7)) DEF_HELPER_ATTR;
-+            is_write = 1;
-+        }
+ #define IN_HELPER_PROTO
-+        break;
-+    }
+@@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
+ #undef DEF_HELPER_FLAGS_5
--    /* XXX: compute is_write */
+ #undef DEF_HELPER_FLAGS_6
--    is_write = 0;
+ #undef DEF_HELPER_FLAGS_7
-     return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
++#undef DEF_HELPER_ATTR
- }
+ #endif /* HELPER_PROTO_H */
 --
-.25.1
+.34.1

-[PULL 10/11] tcg: Remove TCG_TARGET_HAS_cmp_vec
+[PULL v2 06/15] target/loongarch: Enable the disassembler for host tcg
-The cmp_vec opcode is mandatory; this symbol is unused.
+Reuse the decodetree based disassembler from
 target/loongarch/ for tcg/loongarch64/.
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+The generation of decode-insns.c.inc into ./libcommon.fa.p/ could
 eventually result in conflict, if any other host requires the same
 trick, but this is good enough for now.
 Reviewed-by: WANG Xuerui <git@xen0n.name>
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/aarch64/tcg-target.h | 1 -
+ disas.c                      | 2 ++
- tcg/i386/tcg-target.h    | 1 -
+ target/loongarch/meson.build | 3 ++-
- tcg/ppc/tcg-target.h     | 1 -
+files changed, 4 insertions(+), 1 deletion(-)
 files changed, 3 deletions(-)
-diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
+diff --git a/disas.c b/disas.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/aarch64/tcg-target.h
+--- a/disas.c
-+++ b/tcg/aarch64/tcg-target.h
++++ b/disas.c
-@@ -XXX,XX +XXX,XX @@ typedef enum {
+@@ -XXX,XX +XXX,XX @@ static void initialize_debug_host(CPUDebug *s)
- #define TCG_TARGET_HAS_shi_vec          1
+     s->info.cap_insn_split = 6;
- #define TCG_TARGET_HAS_shs_vec          0
+ #elif defined(__hppa__)
- #define TCG_TARGET_HAS_shv_vec          1
+     s->info.print_insn = print_insn_hppa;
--#define TCG_TARGET_HAS_cmp_vec          1
++#elif defined(__loongarch__)
- #define TCG_TARGET_HAS_mul_vec          1
++    s->info.print_insn = print_insn_loongarch;
- #define TCG_TARGET_HAS_sat_vec          1
+ #endif
- #define TCG_TARGET_HAS_minmax_vec       1
+ }
-diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
 diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/i386/tcg-target.h
+--- a/target/loongarch/meson.build
-+++ b/tcg/i386/tcg-target.h
++++ b/target/loongarch/meson.build
-@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
+@@ -XXX,XX +XXX,XX @@ gen = decodetree.process('insns.decode')
- #define TCG_TARGET_HAS_shi_vec          1
+ loongarch_ss = ss.source_set()
- #define TCG_TARGET_HAS_shs_vec          1
+ loongarch_ss.add(files(
- #define TCG_TARGET_HAS_shv_vec          have_avx2
+   'cpu.c',
--#define TCG_TARGET_HAS_cmp_vec          1
+-  'disas.c',
- #define TCG_TARGET_HAS_mul_vec          1
+ ))
- #define TCG_TARGET_HAS_sat_vec          1
+ loongarch_tcg_ss = ss.source_set()
- #define TCG_TARGET_HAS_minmax_vec       1
+ loongarch_tcg_ss.add(gen)
-diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
+@@ -XXX,XX +XXX,XX @@ loongarch_softmmu_ss.add(files(
-index XXXXXXX..XXXXXXX 100644
+   'iocsr_helper.c',
---- a/tcg/ppc/tcg-target.h
+ ))
-+++ b/tcg/ppc/tcg-target.h
-@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
++common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen])
- #define TCG_TARGET_HAS_shi_vec          0
++
- #define TCG_TARGET_HAS_shs_vec          0
+ loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss])
- #define TCG_TARGET_HAS_shv_vec          1
--#define TCG_TARGET_HAS_cmp_vec          1
+ target_arch += {'loongarch': loongarch_ss}
  #define TCG_TARGET_HAS_mul_vec          1
  #define TCG_TARGET_HAS_sat_vec          1
  #define TCG_TARGET_HAS_minmax_vec       1
 --
-.25.1
+.34.1

-New patch
+[PULL v2 07/15] target/loongarch: Disassemble jirl properly
+While jirl shares the same instruction format as bne etc,
+it is not assembled the same.  In particular, rd is printed
+first not second and the immediate is not pc-relative.
+Decode into the arg_rr_i structure, which prints correctly.
+This changes the "offs" member to "imm", to update translate.
+Reviewed-by: WANG Xuerui <git@xen0n.name>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ target/loongarch/insns.decode                  | 3 ++-
+ target/loongarch/disas.c                       | 2 +-
+ target/loongarch/insn_trans/trans_branch.c.inc | 2 +-
+files changed, 4 insertions(+), 3 deletions(-)
+diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
+index XXXXXXX..XXXXXXX 100644
+--- a/target/loongarch/insns.decode
++++ b/target/loongarch/insns.decode
+@@ -XXX,XX +XXX,XX @@
+ @rr_ui12                 .... ...... imm:12 rj:5 rd:5    &rr_i
+ @rr_i14s2         .... ....  .............. rj:5 rd:5    &rr_i imm=%i14s2
+ @rr_i16                     .... .. imm:s16 rj:5 rd:5    &rr_i
++@rr_i16s2         .... ..  ................ rj:5 rd:5    &rr_i imm=%offs16
+ @hint_r_i12           .... ...... imm:s12 rj:5 hint:5    &hint_r_i
+ @rrr_sa2p1        .... ........ ... .. rk:5 rj:5 rd:5    &rrr_sa  sa=%sa2p1
+ @rrr_sa2        .... ........ ... sa:2 rk:5 rj:5 rd:5    &rrr_sa
+@@ -XXX,XX +XXX,XX @@ beqz            0100 00 ................ ..... .....     @r_offs21
+ bnez            0100 01 ................ ..... .....     @r_offs21
+ bceqz           0100 10 ................ 00 ... .....    @c_offs21
+ bcnez           0100 10 ................ 01 ... .....    @c_offs21
+-jirl            0100 11 ................ ..... .....     @rr_offs16
++jirl            0100 11 ................ ..... .....     @rr_i16s2
+ b               0101 00 ..........................       @offs26
+ bl              0101 01 ..........................       @offs26
+ beq             0101 10 ................ ..... .....     @rr_offs16
+diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
+index XXXXXXX..XXXXXXX 100644
+--- a/target/loongarch/disas.c
++++ b/target/loongarch/disas.c
+@@ -XXX,XX +XXX,XX @@ INSN(beqz,         r_offs)
+ INSN(bnez,         r_offs)
+ INSN(bceqz,        c_offs)
+ INSN(bcnez,        c_offs)
+-INSN(jirl,         rr_offs)
++INSN(jirl,         rr_i)
+ INSN(b,            offs)
+ INSN(bl,           offs)
+ INSN(beq,          rr_offs)
+diff --git a/target/loongarch/insn_trans/trans_branch.c.inc b/target/loongarch/insn_trans/trans_branch.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/target/loongarch/insn_trans/trans_branch.c.inc
++++ b/target/loongarch/insn_trans/trans_branch.c.inc
+@@ -XXX,XX +XXX,XX @@ static bool trans_jirl(DisasContext *ctx, arg_jirl *a)
+     TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+     TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+-    tcg_gen_addi_tl(cpu_pc, src1, a->offs);
++    tcg_gen_addi_tl(cpu_pc, src1, a->imm);
+     tcg_gen_movi_tl(dest, ctx->base.pc_next + 4);
+     gen_set_gpr(a->rd, dest, EXT_NONE);
+     tcg_gen_lookup_and_goto_ptr();
+--
+.34.1

-New patch
+[PULL v2 08/15] target/loongarch: Disassemble pcadd* addresses
+Print both the raw field and the resolved pc-relative
+address, as we do for branches.
+Reviewed-by: WANG Xuerui <git@xen0n.name>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ target/loongarch/disas.c | 37 +++++++++++++++++++++++++++++++++----
+file changed, 33 insertions(+), 4 deletions(-)
+diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
+index XXXXXXX..XXXXXXX 100644
+--- a/target/loongarch/disas.c
++++ b/target/loongarch/disas.c
+@@ -XXX,XX +XXX,XX @@ INSN(fsel,         fffc)
+ INSN(addu16i_d,    rr_i)
+ INSN(lu12i_w,      r_i)
+ INSN(lu32i_d,      r_i)
+-INSN(pcaddi,       r_i)
+-INSN(pcalau12i,    r_i)
+-INSN(pcaddu12i,    r_i)
+-INSN(pcaddu18i,    r_i)
+ INSN(ll_w,         rr_i)
+ INSN(sc_w,         rr_i)
+ INSN(ll_d,         rr_i)
+@@ -XXX,XX +XXX,XX @@ static bool trans_fcmp_cond_##suffix(DisasContext *ctx, \
+ FCMP_INSN(s)
+ FCMP_INSN(d)
++
++#define PCADD_INSN(name)                                        \
++static bool trans_##name(DisasContext *ctx, arg_##name *a)      \
++{                                                               \
++    output(ctx, #name, "r%d, %d # 0x%" PRIx64,                  \
++           a->rd, a->imm, gen_##name(ctx->pc, a->imm));         \
++    return true;                                                \
++}
++
++static uint64_t gen_pcaddi(uint64_t pc, int imm)
++{
++    return pc + (imm << 2);
++}
++
++static uint64_t gen_pcalau12i(uint64_t pc, int imm)
++{
++    return (pc + (imm << 12)) & ~0xfff;
++}
++
++static uint64_t gen_pcaddu12i(uint64_t pc, int imm)
++{
++    return pc + (imm << 12);
++}
++
++static uint64_t gen_pcaddu18i(uint64_t pc, int imm)
++{
++    return pc + ((uint64_t)(imm) << 18);
++}
++
++PCADD_INSN(pcaddi)
++PCADD_INSN(pcalau12i)
++PCADD_INSN(pcaddu12i)
++PCADD_INSN(pcaddu18i)
+--
+.34.1

-[PULL 03/11] tcg: Move sorted_args into TCGArgConstraint.sort_index
+[PULL v2 09/15] tcg/loongarch64: Optimize immediate loading
-This uses an existing hole in the TCGArgConstraint structure
+From: Rui Wang <wangrui@loongson.cn>
 and will be convenient for keeping the data in one place.
+diff:
+  Imm                 Before                  After
+  0000000000000000    addi.w  rd, zero, 0     addi.w  rd, zero, 0
+                      lu52i.d rd, zero, 0
+  00000000fffff800    lu12i.w rd, -1          addi.w  rd, zero, -2048
+                      ori     rd, rd, 2048    lu32i.d rd, 0
+                      lu32i.d rd, 0
+Reviewed-by: WANG Xuerui <git@xen0n.name>
+Signed-off-by: Rui Wang <wangrui@loongson.cn>
+Message-Id: <20221107144713.845550-1-wangrui@loongson.cn>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/tcg/tcg.h |  2 +-
+ tcg/loongarch64/tcg-target.c.inc | 35 +++++++++++---------------------
- tcg/tcg.c         | 35 +++++++++++++++++------------------
+file changed, 12 insertions(+), 23 deletions(-)
 files changed, 18 insertions(+), 19 deletions(-)
-diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/include/tcg/tcg.h
+--- a/tcg/loongarch64/tcg-target.c.inc
-+++ b/include/tcg/tcg.h
++++ b/tcg/loongarch64/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
+@@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
- typedef struct TCGArgConstraint {
+     return true;
-     uint16_t ct;
+ }
-     uint8_t alias_index;
-+    uint8_t sort_index;
+-static bool imm_part_needs_loading(bool high_bits_are_ones,
-     TCGRegSet regs;
+-                                   tcg_target_long part)
- } TCGArgConstraint;
+-{
+-    if (high_bits_are_ones) {
-@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
+-        return part != -1;
-     uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
+-    } else {
-     uint8_t flags;
+-        return part != 0;
-     TCGArgConstraint *args_ct;
+-    }
--    int *sorted_args;
+-}
- #if defined(CONFIG_DEBUG_TCG)
+-
-     int used;
+ /* Loads a 32-bit immediate into rd, sign-extended.  */
- #endif
+ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
-diff --git a/tcg/tcg.c b/tcg/tcg.c
+ {
-index XXXXXXX..XXXXXXX 100644
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
---- a/tcg/tcg.c
+     tcg_target_long hi12 = sextreg(val, 12, 20);
-+++ b/tcg/tcg.c
-@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
+     /* Single-instruction cases.  */
-     int op, total_args, n, i;
+-    if (lo == val) {
-     TCGOpDef *def;
+-        /* val fits in simm12: addi.w rd, zero, val */
-     TCGArgConstraint *args_ct;
+-        tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
--    int *sorted_args;
+-        return;
-     TCGTemp *ts;
+-    }
+-    if (0x800 <= val && val <= 0xfff) {
-     memset(s, 0, sizeof(*s));
++    if (hi12 == 0) {
-@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
+         /* val fits in uimm12: ori rd, zero, val */
          tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
          return;
      }
++    if (hi12 == sextreg(lo, 12, 20)) {
-     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
++        /* val fits in simm12: addi.w rd, zero, val */
--    sorted_args = g_malloc(sizeof(int) * total_args);
++        tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
++        return;
-     for(op = 0; op < NB_OPS; op++) {
++    }
-         def = &tcg_op_defs[op];
-         def->args_ct = args_ct;
+     /* High bits must be set; load with lu12i.w + optional ori.  */
--        def->sorted_args = sorted_args;
+     tcg_out_opc_lu12i_w(s, rd, hi12);
-         n = def->nb_iargs + def->nb_oargs;
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
--        sorted_args += n;
-         args_ct += n;
+     intptr_t pc_offset;
      tcg_target_long val_lo, val_hi, pc_hi, offset_hi;
 -    tcg_target_long hi32, hi52;
 -    bool rd_high_bits_are_ones;
 +    tcg_target_long hi12, hi32, hi52;
      /* Value fits in signed i32.  */
      if (type == TCG_TYPE_I32 || val == (int32_t)val) {
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
          return;
      }
-@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
++    hi12 = sextreg(val, 12, 20);
- /* sort from highest priority to lowest */
+     hi32 = sextreg(val, 32, 20);
- static void sort_constraints(TCGOpDef *def, int start, int n)
+     hi52 = sextreg(val, 52, 12);
- {
--    int i, j, p1, p2, tmp;
+     /* Single cu52i.d case.  */
-+    int i, j;
+-    if (ctz64(val) >= 52) {
-+    TCGArgConstraint *a = def->args_ct;
++    if ((hi52 != 0) && (ctz64(val) >= 52)) {
+         tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52);
 -    for(i = 0; i < n; i++)
 -        def->sorted_args[start + i] = start + i;
 -    if (n <= 1)
 +    for (i = 0; i < n; i++) {
 +        a[start + i].sort_index = start + i;
 +    }
 +    if (n <= 1) {
          return;
--    for(i = 0; i < n - 1; i++) {
--        for(j = i + 1; j < n; j++) {
--            p1 = get_constraint_priority(def, def->sorted_args[start + i]);
--            p2 = get_constraint_priority(def, def->sorted_args[start + j]);
-+    }
-+    for (i = 0; i < n - 1; i++) {
-+        for (j = i + 1; j < n; j++) {
-+            int p1 = get_constraint_priority(def, a[start + i].sort_index);
-+            int p2 = get_constraint_priority(def, a[start + j].sort_index);
-             if (p1 < p2) {
--                tmp = def->sorted_args[start + i];
--                def->sorted_args[start + i] = def->sorted_args[start + j];
--                def->sorted_args[start + j] = tmp;
-+                int tmp = a[start + i].sort_index;
-+                a[start + i].sort_index = a[start + j].sort_index;
-+                a[start + j].sort_index = tmp;
-             }
-         }
      }
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
-     for (k = 0; k < nb_iargs; k++) {
+     /* Slow path.  Initialize the low 32 bits, then concat high bits.  */
-         TCGRegSet i_preferred_regs, o_preferred_regs;
+     tcg_out_movi_i32(s, rd, val);
+-    rd_high_bits_are_ones = (int32_t)val < 0;
--        i = def->sorted_args[nb_oargs + k];
-+        i = def->args_ct[nb_oargs + k].sort_index;
+-    if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) {
-         arg = op->args[i];
++    /* Load hi32 and hi52 explicitly when they are unexpected values. */
-         arg_ct = &def->args_ct[i];
++    if (hi32 != sextreg(hi12, 20, 20)) {
-         ts = arg_temp(arg);
+         tcg_out_opc_cu32i_d(s, rd, hi32);
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+-        rd_high_bits_are_ones = hi32 < 0;
-                     int k2, i2;
+     }
-                     reg = ts->reg;
-                     for (k2 = 0 ; k2 < k ; k2++) {
+-    if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) {
--                        i2 = def->sorted_args[nb_oargs + k2];
++    if (hi52 != sextreg(hi32, 20, 12)) {
-+                        i2 = def->args_ct[nb_oargs + k2].sort_index;
+         tcg_out_opc_cu52i_d(s, rd, rd, hi52);
-                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
+     }
-                             reg == new_args[i2]) {
+ }
                              goto allocate_in_reg;
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
          /* satisfy the output constraints */
          for(k = 0; k < nb_oargs; k++) {
 -            i = def->sorted_args[k];
 +            i = def->args_ct[k].sort_index;
              arg = op->args[i];
              arg_ct = &def->args_ct[i];
              ts = arg_temp(arg);
 --
-.25.1
+.34.1

-[PULL 09/11] tcg/optimize: Fold dup2_vec
+[PULL v2 10/15] tcg/loongarch64: Update tcg-insn-defs.c.inc
-When the two arguments are identical, this can be reduced to
+Regenerate with ADDU16I included:
 dup_vec or to mov_vec from a tcg_constant_vec.
+   $ cd loongarch-opcodes/scripts/go
+   $ go run ./genqemutcgdefs > $QEMU/tcg/loongarch64/tcg-insn-defs.c.inc
+Reviewed-by: WANG Xuerui <git@xen0n.name>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 15 +++++++++++++++
+ tcg/loongarch64/tcg-insn-defs.c.inc | 10 +++++++++-
-file changed, 15 insertions(+)
+file changed, 9 insertions(+), 1 deletion(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/tcg/loongarch64/tcg-insn-defs.c.inc
-+++ b/tcg/optimize.c
++++ b/tcg/loongarch64/tcg-insn-defs.c.inc
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@
-             }
+  *
-             goto do_default;
+  * This file is auto-generated by genqemutcgdefs from
+  * https://github.com/loongson-community/loongarch-opcodes,
-+        case INDEX_op_dup2_vec:
+- * from commit 961f0c60f5b63e574d785995600c71ad5413fdc4.
-+            assert(TCG_TARGET_REG_BITS == 32);
++ * from commit 25ca7effe9d88101c1cf96c4005423643386d81f.
-+            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+  * DO NOT EDIT.
-+                tmp = arg_info(op->args[1])->val;
+  */
-+                if (tmp == arg_info(op->args[2])->val) {
-+                    tcg_opt_gen_movi(s, op, op->args[0], tmp);
+@@ -XXX,XX +XXX,XX @@ typedef enum {
-+                    break;
+     OPC_ANDI = 0x03400000,
-+                }
+     OPC_ORI = 0x03800000,
-+            } else if (args_are_copies(op->args[1], op->args[2])) {
+     OPC_XORI = 0x03c00000,
-+                op->opc = INDEX_op_dup_vec;
++    OPC_ADDU16I_D = 0x10000000,
-+                TCGOP_VECE(op) = MO_32;
+     OPC_LU12I_W = 0x14000000,
-+                nb_iargs = 1;
+     OPC_CU32I_D = 0x16000000,
-+            }
+     OPC_PCADDU2I = 0x18000000,
-+            goto do_default;
+@@ -XXX,XX +XXX,XX @@ tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
      tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12));
  }
 +/* Emits the `addu16i.d d, j, sk16` instruction.  */
 +static void __attribute__((unused))
 +tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
 +{
 +    tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16));
 +}
 +
-         CASE_OP_32_64(not):
+ /* Emits the `lu12i.w d, sj20` instruction.  */
-         CASE_OP_32_64(neg):
+ static void __attribute__((unused))
-         CASE_OP_32_64(ext8s):
+ tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20)
 --
-.25.1
+.34.1

-[PULL 05/11] tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields
+[PULL v2 11/15] tcg/loongarch64: Introduce tcg_out_addi
-These are easier to set and test when they have their own fields.
+Adjust the constraints to allow any int32_t for immediate
-Reduce the size of alias_index and sort_index to 4 bits, which is
+addition.  Split immediate adds into addu16i + addi, which
-sufficient for TCG_MAX_OP_ARGS.  This leaves only the bits indicating
+covers quite a lot of the immediate space.  For the hole in
-constants within the ct field.
+the middle, load the constant into TMP0 instead.
-Move all initialization to allocation time, rather than init
+Reviewed-by: WANG Xuerui <git@xen0n.name>
 individual fields in process_op_defs.
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/tcg/tcg.h | 14 +++++++-------
+ tcg/loongarch64/tcg-target-con-set.h |  4 +-
- tcg/tcg.c         | 28 ++++++++++++----------------
+ tcg/loongarch64/tcg-target-con-str.h |  2 +-
-files changed, 19 insertions(+), 23 deletions(-)
+ tcg/loongarch64/tcg-target.c.inc     | 57 ++++++++++++++++++++++++----
 files changed, 53 insertions(+), 10 deletions(-)
-diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/tcg/tcg.h
+--- a/tcg/loongarch64/tcg-target-con-set.h
-+++ b/include/tcg/tcg.h
++++ b/tcg/loongarch64/tcg-target-con-set.h
-@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void);
+@@ -XXX,XX +XXX,XX @@ C_O1_I1(r, L)
- void tcg_dump_info(void);
+ C_O1_I2(r, r, rC)
- void tcg_dump_op_count(void);
+ C_O1_I2(r, r, ri)
+ C_O1_I2(r, r, rI)
--#define TCG_CT_ALIAS  0x80
++C_O1_I2(r, r, rJ)
--#define TCG_CT_IALIAS 0x40
+ C_O1_I2(r, r, rU)
--#define TCG_CT_NEWREG 0x20 /* output requires a new register */
+ C_O1_I2(r, r, rW)
--#define TCG_CT_CONST  0x02 /* any constant of register size */
+ C_O1_I2(r, r, rZ)
-+#define TCG_CT_CONST  1 /* any constant of register size */
+ C_O1_I2(r, 0, rZ)
+-C_O1_I2(r, rZ, rN)
- typedef struct TCGArgConstraint {
++C_O1_I2(r, rZ, ri)
--    uint16_t ct;
++C_O1_I2(r, rZ, rJ)
--    uint8_t alias_index;
+ C_O1_I2(r, rZ, rZ)
--    uint8_t sort_index;
+diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
 +    unsigned ct : 16;
 +    unsigned alias_index : 4;
 +    unsigned sort_index : 4;
 +    bool oalias : 1;
 +    bool ialias : 1;
 +    bool newreg : 1;
      TCGRegSet regs;
  } TCGArgConstraint;
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/tcg.c
+--- a/tcg/loongarch64/tcg-target-con-str.h
-+++ b/tcg/tcg.c
++++ b/tcg/loongarch64/tcg-target-con-str.h
-@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
+@@ -XXX,XX +XXX,XX @@ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
-         total_args += n;
+  * CONST(letter, TCG_CT_CONST_* bit set)
   */
  CONST('I', TCG_CT_CONST_S12)
 -CONST('N', TCG_CT_CONST_N12)
 +CONST('J', TCG_CT_CONST_S32)
  CONST('U', TCG_CT_CONST_U12)
  CONST('Z', TCG_CT_CONST_ZERO)
  CONST('C', TCG_CT_CONST_C12)
 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.c.inc
 +++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_oarg_regs[] = {
  #define TCG_CT_CONST_ZERO  0x100
  #define TCG_CT_CONST_S12   0x200
 -#define TCG_CT_CONST_N12   0x400
 +#define TCG_CT_CONST_S32   0x400
  #define TCG_CT_CONST_U12   0x800
  #define TCG_CT_CONST_C12   0x1000
  #define TCG_CT_CONST_WSZ   0x2000
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
      if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
          return true;
      }
+-    if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) {
--    args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
++    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
-+    args_ct = g_new0(TCGArgConstraint, total_args);
+         return true;
+     }
-     for(op = 0; op < NB_OPS; op++) {
+     if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) {
-         def = &tcg_op_defs[op];
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
-@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
+     }
-     const TCGArgConstraint *arg_ct = &def->args_ct[k];
+ }
-     int n;
++static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd,
--    if (arg_ct->ct & TCG_CT_ALIAS) {
++                         TCGReg rs, tcg_target_long imm)
-+    if (arg_ct->oalias) {
++{
-         /* an alias is equivalent to a single register */
++    tcg_target_long lo12 = sextreg(imm, 0, 12);
-         n = 1;
++    tcg_target_long hi16 = sextreg(imm - lo12, 16, 16);
-     } else {
++
-@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
++    /*
-             /* Incomplete TCGTargetOpDef entry. */
++     * Note that there's a hole in between hi16 and lo12:
-             tcg_debug_assert(ct_str != NULL);
++     *
++     *       3                   2                   1                   0
--            def->args_ct[i].regs = 0;
++     *     1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
--            def->args_ct[i].ct = 0;
++     * ...+-------------------------------+-------+-----------------------+
-             while (*ct_str != '\0') {
++     *    |             hi16              |       |          lo12         |
-                 switch(*ct_str) {
++     * ...+-------------------------------+-------+-----------------------+
-                 case '0' ... '9':
++     *
-@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
++     * For bits within that hole, it's more efficient to use LU12I and ADD.
-                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
++     */
-                         tcg_debug_assert(oarg < def->nb_oargs);
++    if (imm == (hi16 << 16) + lo12) {
-                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
++        if (hi16) {
--                        /* TCG_CT_ALIAS is for the output arguments.
++            tcg_out_opc_addu16i_d(s, rd, rs, hi16);
--                           The input is tagged with TCG_CT_IALIAS. */
++            rs = rd;
-                         def->args_ct[i] = def->args_ct[oarg];
++        }
--                        def->args_ct[oarg].ct |= TCG_CT_ALIAS;
++        if (type == TCG_TYPE_I32) {
-+                        /* The output sets oalias.  */
++            tcg_out_opc_addi_w(s, rd, rs, lo12);
-+                        def->args_ct[oarg].oalias = true;
++        } else if (lo12) {
-                         def->args_ct[oarg].alias_index = i;
++            tcg_out_opc_addi_d(s, rd, rs, lo12);
--                        def->args_ct[i].ct |= TCG_CT_IALIAS;
++        } else {
-+                        /* The input sets ialias. */
++            tcg_out_mov(s, type, rd, rs);
-+                        def->args_ct[i].ialias = true;
++        }
-                         def->args_ct[i].alias_index = oarg;
++    } else {
-                     }
++        tcg_out_movi(s, type, TCG_REG_TMP0, imm);
-                     ct_str++;
++        if (type == TCG_TYPE_I32) {
-                     break;
++            tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0);
-                 case '&':
++        } else {
--                    def->args_ct[i].ct |= TCG_CT_NEWREG;
++            tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0);
-+                    def->args_ct[i].newreg = true;
++        }
-                     ct_str++;
++    }
-                     break;
++}
-                 case 'i':
++
-@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
+ static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
-                     set = *pset;
+ {
+     tcg_out_opc_andi(s, ret, arg, 0xff);
-                     set &= ct->regs;
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
--                    if (ct->ct & TCG_CT_IALIAS) {
-+                    if (ct->ialias) {
+     case INDEX_op_add_i32:
-                         set &= op->output_pref[ct->alias_index];
+         if (c2) {
-                     }
+-            tcg_out_opc_addi_w(s, a0, a1, a2);
-                     /* If the combination is not possible, restart.  */
++            tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2);
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+         } else {
              tcg_out_opc_add_w(s, a0, a1, a2);
          }
+         break;
-         i_preferred_regs = o_preferred_regs = 0;
+     case INDEX_op_add_i64:
--        if (arg_ct->ct & TCG_CT_IALIAS) {
+         if (c2) {
-+        if (arg_ct->ialias) {
+-            tcg_out_opc_addi_d(s, a0, a1, a2);
-             o_preferred_regs = op->output_pref[arg_ct->alias_index];
++            tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2);
-             if (ts->fixed_reg) {
+         } else {
-                 /* if fixed register, we must allocate a new register
+             tcg_out_opc_add_d(s, a0, a1, a2);
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+         }
-                     reg = ts->reg;
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                     for (k2 = 0 ; k2 < k ; k2++) {
-                         i2 = def->args_ct[nb_oargs + k2].sort_index;
+     case INDEX_op_sub_i32:
--                        if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
+         if (c2) {
--                            reg == new_args[i2]) {
+-            tcg_out_opc_addi_w(s, a0, a1, -a2);
-+                        if (def->args_ct[i2].ialias && reg == new_args[i2]) {
++            tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2);
-                             goto allocate_in_reg;
+         } else {
-                         }
+             tcg_out_opc_sub_w(s, a0, a1, a2);
-                     }
+         }
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+         break;
-             /* ENV should not be modified.  */
+     case INDEX_op_sub_i64:
-             tcg_debug_assert(!ts->fixed_reg);
+         if (c2) {
+-            tcg_out_opc_addi_d(s, a0, a1, -a2);
--            if ((arg_ct->ct & TCG_CT_ALIAS)
++            tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2);
--                && !const_args[arg_ct->alias_index]) {
+         } else {
-+            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
+             tcg_out_opc_sub_d(s, a0, a1, a2);
-                 reg = new_args[arg_ct->alias_index];
+         }
--            } else if (arg_ct->ct & TCG_CT_NEWREG) {
+@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-+            } else if (arg_ct->newreg) {
+         return C_O1_I2(r, r, ri);
-                 reg = tcg_reg_alloc(s, arg_ct->regs,
-                                     i_allocated_regs | o_allocated_regs,
+     case INDEX_op_add_i32:
-                                     op->output_pref[k], ts->indirect_base);
++        return C_O1_I2(r, r, ri);
      case INDEX_op_add_i64:
 -        return C_O1_I2(r, r, rI);
 +        return C_O1_I2(r, r, rJ);
      case INDEX_op_and_i32:
      case INDEX_op_and_i64:
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
          return C_O1_I2(r, 0, rZ);
      case INDEX_op_sub_i32:
 +        return C_O1_I2(r, rZ, ri);
      case INDEX_op_sub_i64:
 -        return C_O1_I2(r, rZ, rN);
 +        return C_O1_I2(r, rZ, rJ);
      case INDEX_op_mul_i32:
      case INDEX_op_mul_i64:
 --
-.25.1
+.34.1

-[PULL 08/11] tcg: Fix generation of dupi_vec for 32-bit host
+[PULL v2 12/15] tcg/loongarch64: Improve setcond expansion
-The definition of INDEX_op_dupi_vec is that it operates on
+Split out a helper function, tcg_out_setcond_int, which
-units of tcg_target_ulong -- in this case 32 bits.  It does
+does not always produce the complete boolean result, but
-not work to use this for a uint64_t value that happens to be
+returns a set of flags to do so.
 small enough to fit in tcg_target_ulong.
-Fixes: d2fd745fe8b
+Accept all int32_t as constant input, so that LE/GT can
-Fixes: db432672dc5
+adjust the constant to LT.
-Cc: qemu-stable@nongnu.org
 Reviewed-by: WANG Xuerui <git@xen0n.name>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/tcg-op-vec.c | 12 ++++++++----
+ tcg/loongarch64/tcg-target.c.inc | 165 +++++++++++++++++++++----------
-file changed, 8 insertions(+), 4 deletions(-)
+file changed, 115 insertions(+), 50 deletions(-)
-diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
+diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/tcg-op-vec.c
+--- a/tcg/loongarch64/tcg-target.c.inc
-+++ b/tcg/tcg-op-vec.c
++++ b/tcg/loongarch64/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
+     tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
- void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
+ }
- {
--    if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
+-static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
--        do_dupi_vec(r, MO_32, a);
+-                            TCGReg arg1, TCGReg arg2, bool c2)
--    } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
+-{
-+    if (TCG_TARGET_REG_BITS == 64) {
+-    TCGReg tmp;
-         do_dupi_vec(r, MO_64, a);
++#define SETCOND_INV    TCG_TARGET_NB_REGS
-+    } else if (a == dup_const(MO_32, a)) {
++#define SETCOND_NEZ    (SETCOND_INV << 1)
-+        do_dupi_vec(r, MO_32, a);
++#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
-     } else {
-         TCGv_i64 c = tcg_const_i64(a);
+-    if (c2) {
-         tcg_gen_dup_i64_vec(MO_64, r, c);
+-        tcg_debug_assert(arg2 == 0);
-@@ -XXX,XX +XXX,XX @@ void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
++static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
++                               TCGReg arg1, tcg_target_long arg2, bool c2)
- void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
++{
- {
++    int flags = 0;
--    do_dupi_vec(r, MO_REG, dup_const(vece, a));
++
-+    if (vece == MO_64) {
++    switch (cond) {
-+        tcg_gen_dup64i_vec(r, a);
++    case TCG_COND_EQ:    /* -> NE  */
-+    } else {
++    case TCG_COND_GE:    /* -> LT  */
-+        do_dupi_vec(r, MO_REG, dup_const(vece, a));
++    case TCG_COND_GEU:   /* -> LTU */
 +    case TCG_COND_GT:    /* -> LE  */
 +    case TCG_COND_GTU:   /* -> LEU */
 +        cond = tcg_invert_cond(cond);
 +        flags ^= SETCOND_INV;
 +        break;
 +    default:
 +        break;
      }
      switch (cond) {
 -    case TCG_COND_EQ:
 -        if (c2) {
 -            tmp = arg1;
 -        } else {
 -            tcg_out_opc_sub_d(s, ret, arg1, arg2);
 -            tmp = ret;
 -        }
 -        tcg_out_opc_sltui(s, ret, tmp, 1);
 -        break;
 -    case TCG_COND_NE:
 -        if (c2) {
 -            tmp = arg1;
 -        } else {
 -            tcg_out_opc_sub_d(s, ret, arg1, arg2);
 -            tmp = ret;
 -        }
 -        tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
 -        break;
 -    case TCG_COND_LT:
 -        tcg_out_opc_slt(s, ret, arg1, arg2);
 -        break;
 -    case TCG_COND_GE:
 -        tcg_out_opc_slt(s, ret, arg1, arg2);
 -        tcg_out_opc_xori(s, ret, ret, 1);
 -        break;
      case TCG_COND_LE:
 -        tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false);
 -        break;
 -    case TCG_COND_GT:
 -        tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false);
 -        break;
 -    case TCG_COND_LTU:
 -        tcg_out_opc_sltu(s, ret, arg1, arg2);
 -        break;
 -    case TCG_COND_GEU:
 -        tcg_out_opc_sltu(s, ret, arg1, arg2);
 -        tcg_out_opc_xori(s, ret, ret, 1);
 -        break;
      case TCG_COND_LEU:
 -        tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false);
 +        /*
 +         * If we have a constant input, the most efficient way to implement
 +         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
 +         * We don't need to care for this for LE because the constant input
 +         * is still constrained to int32_t, and INT32_MAX+1 is representable
 +         * in the 64-bit temporary register.
 +         */
 +        if (c2) {
 +            if (cond == TCG_COND_LEU) {
 +                /* unsigned <= -1 is true */
 +                if (arg2 == -1) {
 +                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
 +                    return ret;
 +                }
 +                cond = TCG_COND_LTU;
 +            } else {
 +                cond = TCG_COND_LT;
 +            }
 +            arg2 += 1;
 +        } else {
 +            TCGReg tmp = arg2;
 +            arg2 = arg1;
 +            arg1 = tmp;
 +            cond = tcg_swap_cond(cond);    /* LE -> GE */
 +            cond = tcg_invert_cond(cond);  /* GE -> LT */
 +            flags ^= SETCOND_INV;
 +        }
          break;
 -    case TCG_COND_GTU:
 -        tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false);
 +    default:
          break;
 +    }
 +
 +    switch (cond) {
 +    case TCG_COND_NE:
 +        flags |= SETCOND_NEZ;
 +        if (!c2) {
 +            tcg_out_opc_xor(s, ret, arg1, arg2);
 +        } else if (arg2 == 0) {
 +            ret = arg1;
 +        } else if (arg2 >= 0 && arg2 <= 0xfff) {
 +            tcg_out_opc_xori(s, ret, arg1, arg2);
 +        } else {
 +            tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2);
 +        }
 +        break;
 +
 +    case TCG_COND_LT:
 +    case TCG_COND_LTU:
 +        if (c2) {
 +            if (arg2 >= -0x800 && arg2 <= 0x7ff) {
 +                if (cond == TCG_COND_LT) {
 +                    tcg_out_opc_slti(s, ret, arg1, arg2);
 +                } else {
 +                    tcg_out_opc_sltui(s, ret, arg1, arg2);
 +                }
 +                break;
 +            }
 +            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
 +            arg2 = TCG_REG_TMP0;
 +        }
 +        if (cond == TCG_COND_LT) {
 +            tcg_out_opc_slt(s, ret, arg1, arg2);
 +        } else {
 +            tcg_out_opc_sltu(s, ret, arg1, arg2);
 +        }
 +        break;
 +
      default:
          g_assert_not_reached();
          break;
      }
 +
 +    return ret | flags;
 +}
 +
 +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
 +                            TCGReg arg1, tcg_target_long arg2, bool c2)
 +{
 +    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
 +
 +    if (tmpflags != ret) {
 +        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
 +
 +        switch (tmpflags & SETCOND_FLAGS) {
 +        case SETCOND_INV:
 +            /* Intermediate result is boolean: simply invert. */
 +            tcg_out_opc_xori(s, ret, tmp, 1);
 +            break;
 +        case SETCOND_NEZ:
 +            /* Intermediate result is zero/non-zero: test != 0. */
 +            tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
 +            break;
 +        case SETCOND_NEZ | SETCOND_INV:
 +            /* Intermediate result is zero/non-zero: test == 0. */
 +            tcg_out_opc_sltui(s, ret, tmp, 1);
 +            break;
 +        default:
 +            g_assert_not_reached();
 +        }
 +    }
  }
- void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
+ /*
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
      case INDEX_op_ctz_i64:
          return C_O1_I2(r, r, rW);
 -    case INDEX_op_setcond_i32:
 -    case INDEX_op_setcond_i64:
 -        return C_O1_I2(r, r, rZ);
 -
      case INDEX_op_deposit_i32:
      case INDEX_op_deposit_i64:
          /* Must deposit into the same register as input */
          return C_O1_I2(r, 0, rZ);
      case INDEX_op_sub_i32:
 +    case INDEX_op_setcond_i32:
          return C_O1_I2(r, rZ, ri);
      case INDEX_op_sub_i64:
 +    case INDEX_op_setcond_i64:
          return C_O1_I2(r, rZ, rJ);
      case INDEX_op_mul_i32:
 --
-.25.1
+.34.1

-[PULL 02/11] tcg: Drop union from TCGArgConstraint
+[PULL v2 13/15] tcg/loongarch64: Implement movcond
-The union is unused; let "regs" appear in the main structure
+Reviewed-by: WANG Xuerui <git@xen0n.name>
 without the "u.regs" wrapping.
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/tcg/tcg.h            |  4 +---
+ tcg/loongarch64/tcg-target-con-set.h |  1 +
- tcg/tcg.c                    | 22 +++++++++++-----------
+ tcg/loongarch64/tcg-target.h         |  4 ++--
- tcg/aarch64/tcg-target.c.inc | 14 +++++++-------
+ tcg/loongarch64/tcg-target.c.inc     | 33 ++++++++++++++++++++++++++++
- tcg/arm/tcg-target.c.inc     | 26 +++++++++++++-------------
+files changed, 36 insertions(+), 2 deletions(-)
  tcg/i386/tcg-target.c.inc    | 26 +++++++++++++-------------
  tcg/mips/tcg-target.c.inc    | 18 +++++++++---------
  tcg/ppc/tcg-target.c.inc     | 24 ++++++++++++------------
  tcg/riscv/tcg-target.c.inc   | 14 +++++++-------
  tcg/s390/tcg-target.c.inc    | 18 +++++++++---------
  tcg/sparc/tcg-target.c.inc   | 16 ++++++++--------
  tcg/tci/tcg-target.c.inc     |  2 +-
 files changed, 91 insertions(+), 93 deletions(-)
-diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/tcg/tcg.h
+--- a/tcg/loongarch64/tcg-target-con-set.h
-+++ b/include/tcg/tcg.h
++++ b/tcg/loongarch64/tcg-target-con-set.h
-@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
+@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, 0, rZ)
- typedef struct TCGArgConstraint {
+ C_O1_I2(r, rZ, ri)
-     uint16_t ct;
+ C_O1_I2(r, rZ, rJ)
-     uint8_t alias_index;
+ C_O1_I2(r, rZ, rZ)
--    union {
++C_O1_I4(r, rZ, rJ, rZ, rZ)
--        TCGRegSet regs;
+diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
 -    } u;
 +    TCGRegSet regs;
  } TCGArgConstraint;
  #define TCG_MAX_OP_ARGS 16
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/tcg.c
+--- a/tcg/loongarch64/tcg-target.h
-+++ b/tcg/tcg.c
++++ b/tcg/loongarch64/tcg-target.h
-@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
+@@ -XXX,XX +XXX,XX @@ typedef enum {
-             return 0;
+ #define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
-         n = 0;
-         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ /* optional instructions */
--            if (tcg_regset_test_reg(arg_ct->u.regs, i))
+-#define TCG_TARGET_HAS_movcond_i32      0
-+            if (tcg_regset_test_reg(arg_ct->regs, i))
++#define TCG_TARGET_HAS_movcond_i32      1
-                 n++;
+ #define TCG_TARGET_HAS_div_i32          1
-         }
+ #define TCG_TARGET_HAS_rem_i32          1
  #define TCG_TARGET_HAS_div2_i32         0
@@ -XXX,XX +XXX,XX @@ typedef enum {
  #define TCG_TARGET_HAS_qemu_st8_i32     0
  /* 64-bit operations */
 -#define TCG_TARGET_HAS_movcond_i64      0
 +#define TCG_TARGET_HAS_movcond_i64      1
  #define TCG_TARGET_HAS_div_i64          1
  #define TCG_TARGET_HAS_rem_i64          1
  #define TCG_TARGET_HAS_div2_i64         0
 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/loongarch64/tcg-target.c.inc
 +++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
      }
-@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
+ }
-             /* Incomplete TCGTargetOpDef entry. */
-             tcg_debug_assert(ct_str != NULL);
++static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
++                            TCGReg c1, tcg_target_long c2, bool const2,
--            def->args_ct[i].u.regs = 0;
++                            TCGReg v1, TCGReg v2)
-+            def->args_ct[i].regs = 0;
++{
-             def->args_ct[i].ct = 0;
++    int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2);
-             while (*ct_str != '\0') {
++    TCGReg t;
-                 switch(*ct_str) {
++
-@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
++    /* Standardize the test below to t != 0. */
-                     pset = la_temp_pref(ts);
++    if (tmpflags & SETCOND_INV) {
-                     set = *pset;
++        t = v1, v1 = v2, v2 = t;
++    }
--                    set &= ct->u.regs;
++
-+                    set &= ct->regs;
++    t = tmpflags & ~SETCOND_FLAGS;
-                     if (ct->ct & TCG_CT_IALIAS) {
++    if (v1 == TCG_REG_ZERO) {
-                         set &= op->output_pref[ct->alias_index];
++        tcg_out_opc_masknez(s, ret, v2, t);
-                     }
++    } else if (v2 == TCG_REG_ZERO) {
-                     /* If the combination is not possible, restart.  */
++        tcg_out_opc_maskeqz(s, ret, v1, t);
-                     if (set == 0) {
++    } else {
--                        set = ct->u.regs;
++        tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */
-+                        set = ct->regs;
++        tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */
-                     }
++        tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2);
-                     *pset = set;
++    }
-                 }
++}
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
++
-         return;
+ /*
   * Branch helpers
   */
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
          tcg_out_setcond(s, args[3], a0, a1, a2, c2);
          break;
 +    case INDEX_op_movcond_i32:
 +    case INDEX_op_movcond_i64:
 +        tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]);
 +        break;
 +
      case INDEX_op_ld8s_i32:
      case INDEX_op_ld8s_i64:
          tcg_out_ldst(s, OPC_LD_B, a0, a1, a2);
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
      case INDEX_op_remu_i64:
          return C_O1_I2(r, rZ, rZ);
 +    case INDEX_op_movcond_i32:
 +    case INDEX_op_movcond_i64:
 +        return C_O1_I4(r, rZ, rJ, rZ, rZ);
 +
      default:
          g_assert_not_reached();
      }
--    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
--    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
-+    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
-+    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
-     /* Allocate the output register now.  */
-     if (ots->val_type != TEMP_VAL_REG) {
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
-             }
-         }
--        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
-+        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
-         reg = ts->reg;
--        if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
-+        if (tcg_regset_test_reg(arg_ct->regs, reg)) {
-             /* nothing to do : the constraint is satisfied */
-         } else {
-         allocate_in_reg:
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
-                and move the temporary register into it */
-             temp_load(s, ts, tcg_target_available_regs[ts->type],
-                       i_allocated_regs, 0);
--            reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
-+            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
-                                 o_preferred_regs, ts->indirect_base);
-             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
-                 /*
-@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
-                 && !const_args[arg_ct->alias_index]) {
-                 reg = new_args[arg_ct->alias_index];
-             } else if (arg_ct->ct & TCG_CT_NEWREG) {
--                reg = tcg_reg_alloc(s, arg_ct->u.regs,
-+                reg = tcg_reg_alloc(s, arg_ct->regs,
-                                     i_allocated_regs | o_allocated_regs,
-                                     op->output_pref[k], ts->indirect_base);
-             } else {
--                reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
-+                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
-                                     op->output_pref[k], ts->indirect_base);
-             }
-             tcg_regset_set_reg(o_allocated_regs, reg);
-diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/aarch64/tcg-target.c.inc
-+++ b/tcg/aarch64/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     switch (*ct_str++) {
-     case 'r': /* general registers */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs |= 0xffffffffu;
-+        ct->regs |= 0xffffffffu;
-         break;
-     case 'w': /* advsimd registers */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs |= 0xffffffff00000000ull;
-+        ct->regs |= 0xffffffff00000000ull;
-         break;
-     case 'l': /* qemu_ld / qemu_st address, data_reg */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffffu;
-+        ct->regs = 0xffffffffu;
- #ifdef CONFIG_SOFTMMU
-         /* x0 and x1 will be overwritten when reading the tlb entry,
-            and x2, and x3 for helper args, better to avoid using them. */
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
- #endif
-         break;
-     case 'A': /* Valid for arithmetic immediate (positive or negative).  */
-diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/arm/tcg-target.c.inc
-+++ b/tcg/arm/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     case 'r':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffff;
-+        ct->regs = 0xffff;
-         break;
-     /* qemu_ld address */
-     case 'l':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffff;
-+        ct->regs = 0xffff;
- #ifdef CONFIG_SOFTMMU
-         /* r0-r2,lr will be overwritten when reading the tlb entry,
-            so don't use these. */
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
- #endif
-         break;
-     /* qemu_st address & data */
-     case 's':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffff;
-+        ct->regs = 0xffff;
-         /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
-            and r0-r1 doing the byte swapping, so don't use these. */
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
- #if defined(CONFIG_SOFTMMU)
-         /* Avoid clashes with registers being used for helper args */
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
- #if TARGET_LONG_BITS == 64
-         /* Avoid clashes with registers being used for helper args */
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
- #endif
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
- #endif
-         break;
-diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/i386/tcg-target.c.inc
-+++ b/tcg/i386/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     switch(*ct_str++) {
-     case 'a':
-         ct->ct |= TCG_CT_REG;
--        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
-+        tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
-         break;
-     case 'b':
-         ct->ct |= TCG_CT_REG;
--        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
-+        tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
-         break;
-     case 'c':
-         ct->ct |= TCG_CT_REG;
--        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
-+        tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
-         break;
-     case 'd':
-         ct->ct |= TCG_CT_REG;
--        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
-+        tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
-         break;
-     case 'S':
-         ct->ct |= TCG_CT_REG;
--        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
-+        tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
-         break;
-     case 'D':
-         ct->ct |= TCG_CT_REG;
--        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
-+        tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
-         break;
-     case 'q':
-         /* A register that can be used as a byte operand.  */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
-+        ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
-         break;
-     case 'Q':
-         /* A register with an addressable second byte (e.g. %ah).  */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xf;
-+        ct->regs = 0xf;
-         break;
-     case 'r':
-         /* A general register.  */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs |= ALL_GENERAL_REGS;
-+        ct->regs |= ALL_GENERAL_REGS;
-         break;
-     case 'W':
-         /* With TZCNT/LZCNT, we can have operand-size as an input.  */
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     case 'x':
-         /* A vector register.  */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs |= ALL_VECTOR_REGS;
-+        ct->regs |= ALL_VECTOR_REGS;
-         break;
-         /* qemu_ld/st address constraint */
-     case 'L':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
-+        ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
-         break;
-     case 'e':
-diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/mips/tcg-target.c.inc
-+++ b/tcg/mips/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     switch(*ct_str++) {
-     case 'r':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
-+        ct->regs = 0xffffffff;
-         break;
-     case 'L': /* qemu_ld input arg constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
-+        ct->regs = 0xffffffff;
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
- #if defined(CONFIG_SOFTMMU)
-         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
--            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
-+            tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
-         }
- #endif
-         break;
-     case 'S': /* qemu_st constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
-+        ct->regs = 0xffffffff;
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
- #if defined(CONFIG_SOFTMMU)
-         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
--            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
--            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
-+            tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
-+            tcg_regset_reset_reg(ct->regs, TCG_REG_A3);
-         } else {
--            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
-+            tcg_regset_reset_reg(ct->regs, TCG_REG_A1);
-         }
- #endif
-         break;
-diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/ppc/tcg-target.c.inc
-+++ b/tcg/ppc/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     switch (*ct_str++) {
-     case 'A': case 'B': case 'C': case 'D':
-         ct->ct |= TCG_CT_REG;
--        tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
-+        tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
-         break;
-     case 'r':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
-+        ct->regs = 0xffffffff;
-         break;
-     case 'v':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff00000000ull;
-+        ct->regs = 0xffffffff00000000ull;
-         break;
-     case 'L':                   /* qemu_ld constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
-+        ct->regs = 0xffffffff;
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
- #ifdef CONFIG_SOFTMMU
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
- #endif
-         break;
-     case 'S':                   /* qemu_st constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
-+        ct->regs = 0xffffffff;
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
- #ifdef CONFIG_SOFTMMU
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
- #endif
-         break;
-     case 'I':
-diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/riscv/tcg-target.c.inc
-+++ b/tcg/riscv/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     switch (*ct_str++) {
-     case 'r':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
-+        ct->regs = 0xffffffff;
-         break;
-     case 'L':
-         /* qemu_ld/qemu_st constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
-+        ct->regs = 0xffffffff;
-         /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
- #if defined(CONFIG_SOFTMMU)
--        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
--        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
--        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
--        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]);
--        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]);
-+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[0]);
-+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[1]);
-+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[2]);
-+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[3]);
-+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[4]);
- #endif
-         break;
-     case 'I':
-diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/s390/tcg-target.c.inc
-+++ b/tcg/s390/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     switch (*ct_str++) {
-     case 'r':                  /* all registers */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffff;
-+        ct->regs = 0xffff;
-         break;
-     case 'L':                  /* qemu_ld/st constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffff;
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
-+        ct->regs = 0xffff;
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
-         break;
-     case 'a':                  /* force R2 for division */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0;
--        tcg_regset_set_reg(ct->u.regs, TCG_REG_R2);
-+        ct->regs = 0;
-+        tcg_regset_set_reg(ct->regs, TCG_REG_R2);
-         break;
-     case 'b':                  /* force R3 for division */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0;
--        tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
-+        ct->regs = 0;
-+        tcg_regset_set_reg(ct->regs, TCG_REG_R3);
-         break;
-     case 'A':
-         ct->ct |= TCG_CT_CONST_S33;
-diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/sparc/tcg-target.c.inc
-+++ b/tcg/sparc/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     switch (*ct_str++) {
-     case 'r':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
-+        ct->regs = 0xffffffff;
-         break;
-     case 'R':
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = ALL_64;
-+        ct->regs = ALL_64;
-         break;
-     case 'A': /* qemu_ld/st address constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
-+        ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
-     reserve_helpers:
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
--        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_O1);
-+        tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
-         break;
-     case 's': /* qemu_st data 32-bit constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = 0xffffffff;
-+        ct->regs = 0xffffffff;
-         goto reserve_helpers;
-     case 'S': /* qemu_st data 64-bit constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = ALL_64;
-+        ct->regs = ALL_64;
-         goto reserve_helpers;
-     case 'I':
-         ct->ct |= TCG_CT_CONST_S11;
-diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/tci/tcg-target.c.inc
-+++ b/tcg/tci/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
-     case 'L':                   /* qemu_ld constraint */
-     case 'S':                   /* qemu_st constraint */
-         ct->ct |= TCG_CT_REG;
--        ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1;
-+        ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
-         break;
-     default:
-         return NULL;
 --
-.25.1
+.34.1

-[PULL 07/11] tcg/i386: Fix dupi for avx2 32-bit hosts
+[PULL v2 14/15] tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
-The previous change wrongly stated that 32-bit avx2 should have
+Take the w^x split into account when computing the
-used VPBROADCASTW.  But that's a 16-bit broadcast and we want a
+pc-relative distance to an absolute pointer.
 -bit broadcast.
-Fixes: 7b60ef3264e
+Reviewed-by: WANG Xuerui <git@xen0n.name>
-Cc: qemu-stable@nongnu.org
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/i386/tcg-target.c.inc | 2 +-
+ tcg/loongarch64/tcg-target.c.inc | 2 +-
 file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
+diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/i386/tcg-target.c.inc
+--- a/tcg/loongarch64/tcg-target.c.inc
-+++ b/tcg/i386/tcg-target.c.inc
++++ b/tcg/loongarch64/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
-         new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
+     intptr_t imm12 = sextreg(offset, 0, 12);
-     } else {
-         if (have_avx2) {
+     if (offset != imm12) {
--            tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret);
+-        intptr_t diff = offset - (uintptr_t)s->code_ptr;
-+            tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
++        intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
-         } else {
-             tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
+         if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
-         }
+             imm12 = sextreg(diff, 0, 12);
 --
-.25.1
+.34.1

-[PULL 01/11] tcg: Adjust simd_desc size encoding
+[PULL v2 15/15] tcg/loongarch64: Reorg goto_tb implementation
-With larger vector sizes, it turns out oprsz == maxsz, and we only
+The old implementation replaces two insns, swapping between
 need to represent mismatch for oprsz <= 32.  We do, however, need
 to represent larger oprsz and do so without reducing SIMD_DATA_BITS.
-Reduce the size of the oprsz field and increase the maxsz field.
+        b       <dest>
-Steal the oprsz value of 24 to indicate equality with maxsz.
+        nop
 and
         pcaddu18i tmp, <dest>
         jirl      zero, tmp, <dest> & 0xffff
-Tested-by: Frank Chang <frank.chang@sifive.com>
+There is a race condition in which a thread could be stopped at
-Reviewed-by: Frank Chang <frank.chang@sifive.com>
+the jirl, i.e. with the top of the address loaded, and when
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+restarted we have re-linked to a different TB, so that the top
 half no longer matches the bottom half.
 Note that while we never directly re-link to a different TB, we
 can link, unlink, and link again all while the stopped thread
 remains stopped.
 The new implementation replaces only one insn, swapping between
         b       <dest>
 and
         pcadd   tmp, <jmp_addr>
 falling through to load the address from tmp, and branch.
 Reviewed-by: WANG Xuerui <git@xen0n.name>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/tcg/tcg-gvec-desc.h | 38 ++++++++++++++++++++++++-------------
+ tcg/loongarch64/tcg-target.h     |  7 +---
- tcg/tcg-op-gvec.c           | 35 ++++++++++++++++++++++++++--------
+ tcg/loongarch64/tcg-target.c.inc | 72 ++++++++++++++------------------
-files changed, 52 insertions(+), 21 deletions(-)
+files changed, 33 insertions(+), 46 deletions(-)
-diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h
+diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/tcg/tcg-gvec-desc.h
+--- a/tcg/loongarch64/tcg-target.h
-+++ b/include/tcg/tcg-gvec-desc.h
++++ b/tcg/loongarch64/tcg-target.h
 @@ -XXX,XX +XXX,XX @@
- #ifndef TCG_TCG_GVEC_DESC_H
- #define TCG_TCG_GVEC_DESC_H
+ #define TCG_TARGET_INSN_UNIT_SIZE 4
+ #define TCG_TARGET_NB_REGS 32
--/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
+-/*
--#define SIMD_OPRSZ_SHIFT   0
+- * PCADDU18I + JIRL sequence can give 20 + 16 + 2 = 38 bits
--#define SIMD_OPRSZ_BITS    5
+- * signed offset, which is +/- 128 GiB.
-+/*
+- */
-+ * This configuration allows MAXSZ to represent 2048 bytes, and
+-#define MAX_CODE_GEN_BUFFER_SIZE  (128 * GiB)
-+ * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32.
++
-+ *
++#define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
-+ * Encode this with:
-+ *   0, 1, 3 -> 8, 16, 32
+ typedef enum {
-+ *   2       -> maxsz
+     TCG_REG_ZERO,
-+ *
+diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
-+ * This steals the input that would otherwise map to 24 to match maxsz.
+index XXXXXXX..XXXXXXX 100644
-+ */
+--- a/tcg/loongarch64/tcg-target.c.inc
-+#define SIMD_MAXSZ_SHIFT   0
++++ b/tcg/loongarch64/tcg-target.c.inc
-+#define SIMD_MAXSZ_BITS    8
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
+ #endif
--#define SIMD_MAXSZ_SHIFT   (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
+ }
--#define SIMD_MAXSZ_BITS    5
-+#define SIMD_OPRSZ_SHIFT   (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
+-/* LoongArch uses `andi zero, zero, 0` as NOP.  */
-+#define SIMD_OPRSZ_BITS    2
+-#define NOP OPC_ANDI
+-static void tcg_out_nop(TCGContext *s)
 -#define SIMD_DATA_SHIFT    (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
 +#define SIMD_DATA_SHIFT    (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
  #define SIMD_DATA_BITS     (32 - SIMD_DATA_SHIFT)
  /* Create a descriptor from components.  */
  uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
 -/* Extract the operation size from a descriptor.  */
 -static inline intptr_t simd_oprsz(uint32_t desc)
 -{
--    return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
+-    tcg_out32(s, NOP);
 -}
 -
- /* Extract the max vector size from a descriptor.  */
+-void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
- static inline intptr_t simd_maxsz(uint32_t desc)
+-                              uintptr_t jmp_rx, uintptr_t jmp_rw)
 -{
 -    tcg_insn_unit i1, i2;
 -    ptrdiff_t upper, lower;
 -    uintptr_t addr = tb->jmp_target_addr[n];
 -    ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2;
 -
 -    if (offset == sextreg(offset, 0, 26)) {
 -        i1 = encode_sd10k16_insn(OPC_B, offset);
 -        i2 = NOP;
 -    } else {
 -        tcg_debug_assert(offset == sextreg(offset, 0, 36));
 -        lower = (int16_t)offset;
 -        upper = (offset - lower) >> 16;
 -
 -        i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper);
 -        i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower);
 -    }
 -    uint64_t pair = ((uint64_t)i2 << 32) | i1;
 -    qatomic_set((uint64_t *)jmp_rw, pair);
 -    flush_idcache_range(jmp_rx, jmp_rw, 8);
 -}
 -
  /*
   * Entry-points
   */
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
  static void tcg_out_goto_tb(TCGContext *s, int which)
  {
--    return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
+     /*
-+    return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8;
+-     * Ensure that patch area is 8-byte aligned so that an
 -     * atomic write can be used to patch the target address.
 +     * Direct branch, or load indirect address, to be patched
 +     * by tb_target_set_jmp_target.  Check indirect load offset
 +     * in range early, regardless of direct branch distance,
 +     * via assert within tcg_out_opc_pcaddu2i.
       */
 -    if ((uintptr_t)s->code_ptr & 7) {
 -        tcg_out_nop(s);
 -    }
 +    uintptr_t i_addr = get_jmp_target_addr(s, which);
 +    intptr_t i_disp = tcg_pcrel_diff(s, (void *)i_addr);
 +
      set_jmp_insn_offset(s, which);
 -    /*
 -     * actual branch destination will be patched by
 -     * tb_target_set_jmp_target later
 -     */
 -    tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
 +    tcg_out_opc_pcaddu2i(s, TCG_REG_TMP0, i_disp >> 2);
 +
 +    /* Finish the load and indirect branch. */
 +    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_TMP0, 0);
      tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
      set_jmp_reset_offset(s, which);
  }
 +void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
 +                              uintptr_t jmp_rx, uintptr_t jmp_rw)
 +{
 +    uintptr_t d_addr = tb->jmp_target_addr[n];
 +    ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_rx) >> 2;
 +    tcg_insn_unit insn;
 +
 +    /* Either directly branch, or load slot address for indirect branch. */
 +    if (d_disp == sextreg(d_disp, 0, 26)) {
 +        insn = encode_sd10k16_insn(OPC_B, d_disp);
 +    } else {
 +        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
 +        intptr_t i_disp = i_addr - jmp_rx;
 +        insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2);
 +    }
 +
 +    qatomic_set((tcg_insn_unit *)jmp_rw, insn);
 +    flush_idcache_range(jmp_rx, jmp_rw, 4);
 +}
 +
-+/* Extract the operation size from a descriptor.  */
+ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
-+static inline intptr_t simd_oprsz(uint32_t desc)
+                        const TCGArg args[TCG_MAX_OP_ARGS],
-+{
+                        const int const_args[TCG_MAX_OP_ARGS])
 +    uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS);
 +    intptr_t o = f * 8 + 8;
 +    intptr_t m = simd_maxsz(desc);
 +    return f == 2 ? m : o;
  }
  /* Extract the operation-specific data from a descriptor.  */
 diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg-op-gvec.c
 +++ b/tcg/tcg-op-gvec.c
@@ -XXX,XX +XXX,XX @@ static const TCGOpcode vecop_list_empty[1] = { 0 };
     of the operand offsets so that we can check them all at once.  */
  static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
  {
 -    uint32_t opr_align = oprsz >= 16 ? 15 : 7;
 -    uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
 -    tcg_debug_assert(oprsz > 0);
 -    tcg_debug_assert(oprsz <= maxsz);
 -    tcg_debug_assert((oprsz & opr_align) == 0);
 +    uint32_t max_align;
 +
 +    switch (oprsz) {
 +    case 8:
 +    case 16:
 +    case 32:
 +        tcg_debug_assert(oprsz <= maxsz);
 +        break;
 +    default:
 +        tcg_debug_assert(oprsz == maxsz);
 +        break;
 +    }
 +    tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS));
 +
 +    max_align = maxsz >= 16 ? 15 : 7;
      tcg_debug_assert((maxsz & max_align) == 0);
      tcg_debug_assert((ofs & max_align) == 0);
  }
@@ -XXX,XX +XXX,XX @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
  {
      uint32_t desc = 0;
 -    assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
 -    assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
 -    assert(data == sextract32(data, 0, SIMD_DATA_BITS));
 +    check_size_align(oprsz, maxsz, 0);
 +    tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS));
      oprsz = (oprsz / 8) - 1;
      maxsz = (maxsz / 8) - 1;
 +
 +    /*
 +     * We have just asserted in check_size_align that either
 +     * oprsz is {8,16,32} or matches maxsz.  Encode the final
 +     * case with '2', as that would otherwise map to 24.
 +     */
 +    if (oprsz == maxsz) {
 +        oprsz = 2;
 +    }
 +
      desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
      desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
      desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
 --
-.25.1
+.34.1

The following changes since commit 6eeea6725a70e6fcb5abba0764496bdab07ddfb3:

Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2020-10-06' into staging (2020-10-06 21:13:34 +0100)

are available in the Git repository at:

https://github.com/rth7680/qemu.git tags/pull-tcg-20201008

for you to fetch changes up to 62475e9d007d83db4d0a6ccebcda8914f392e9c9:

accel/tcg: Fix computing of is_write for MIPS (2020-10-08 05:57:32 -0500)

----------------------------------------------------------------
Extend maximum gvec vector size
Fix i386 avx2 dupi
Fix mips host user-only write detection
Misc cleanups.

----------------------------------------------------------------
Kele Huang (1):
      accel/tcg: Fix computing of is_write for MIPS

Richard Henderson (10):
      tcg: Adjust simd_desc size encoding
      tcg: Drop union from TCGArgConstraint
      tcg: Move sorted_args into TCGArgConstraint.sort_index
      tcg: Remove TCG_CT_REG
      tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields
      tcg: Remove TCGOpDef.used
      tcg/i386: Fix dupi for avx2 32-bit hosts
      tcg: Fix generation of dupi_vec for 32-bit host
      tcg/optimize: Fold dup2_vec
      tcg: Remove TCG_TARGET_HAS_cmp_vec

With larger vector sizes, it turns out oprsz == maxsz, and we only
need to represent mismatch for oprsz <= 32.  We do, however, need
to represent larger oprsz and do so without reducing SIMD_DATA_BITS.

Reduce the size of the oprsz field and increase the maxsz field.
Steal the oprsz value of 24 to indicate equality with maxsz.

Tested-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-gvec-desc.h | 38 ++++++++++++++++++++++++-------------
 tcg/tcg-op-gvec.c           | 35 ++++++++++++++++++++++++++--------
 2 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg-gvec-desc.h
+++ b/include/tcg/tcg-gvec-desc.h
@@ -XXX,XX +XXX,XX @@
 #ifndef TCG_TCG_GVEC_DESC_H
 #define TCG_TCG_GVEC_DESC_H
 
-/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
-#define SIMD_OPRSZ_SHIFT   0
-#define SIMD_OPRSZ_BITS    5
+/*
+ * This configuration allows MAXSZ to represent 2048 bytes, and
+ * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32.
+ *
+ * Encode this with:
+ *   0, 1, 3 -> 8, 16, 32
+ *   2       -> maxsz
+ *
+ * This steals the input that would otherwise map to 24 to match maxsz.
+ */
+#define SIMD_MAXSZ_SHIFT   0
+#define SIMD_MAXSZ_BITS    8
 
-#define SIMD_MAXSZ_SHIFT   (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
-#define SIMD_MAXSZ_BITS    5
+#define SIMD_OPRSZ_SHIFT   (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
+#define SIMD_OPRSZ_BITS    2
 
-#define SIMD_DATA_SHIFT    (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
+#define SIMD_DATA_SHIFT    (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
 #define SIMD_DATA_BITS     (32 - SIMD_DATA_SHIFT)
 
 /* Create a descriptor from components.  */
 uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
 
-/* Extract the operation size from a descriptor.  */
-static inline intptr_t simd_oprsz(uint32_t desc)
-{
-    return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
-}
-
 /* Extract the max vector size from a descriptor.  */
 static inline intptr_t simd_maxsz(uint32_t desc)
 {
-    return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
+    return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8;
+}
+
+/* Extract the operation size from a descriptor.  */
+static inline intptr_t simd_oprsz(uint32_t desc)
+{
+    uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS);
+    intptr_t o = f * 8 + 8;
+    intptr_t m = simd_maxsz(desc);
+    return f == 2 ? m : o;
 }
 
 /* Extract the operation-specific data from a descriptor.  */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -XXX,XX +XXX,XX @@ static const TCGOpcode vecop_list_empty[1] = { 0 };
    of the operand offsets so that we can check them all at once.  */
 static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
 {
-    uint32_t opr_align = oprsz >= 16 ? 15 : 7;
-    uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
-    tcg_debug_assert(oprsz > 0);
-    tcg_debug_assert(oprsz <= maxsz);
-    tcg_debug_assert((oprsz & opr_align) == 0);
+    uint32_t max_align;
+
+    switch (oprsz) {
+    case 8:
+    case 16:
+    case 32:
+        tcg_debug_assert(oprsz <= maxsz);
+        break;
+    default:
+        tcg_debug_assert(oprsz == maxsz);
+        break;
+    }
+    tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS));
+
+    max_align = maxsz >= 16 ? 15 : 7;
     tcg_debug_assert((maxsz & max_align) == 0);
     tcg_debug_assert((ofs & max_align) == 0);
 }
@@ -XXX,XX +XXX,XX @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
 {
     uint32_t desc = 0;
 
-    assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
-    assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
-    assert(data == sextract32(data, 0, SIMD_DATA_BITS));
+    check_size_align(oprsz, maxsz, 0);
+    tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS));
 
     oprsz = (oprsz / 8) - 1;
     maxsz = (maxsz / 8) - 1;
+
+    /*
+     * We have just asserted in check_size_align that either
+     * oprsz is {8,16,32} or matches maxsz.  Encode the final
+     * case with '2', as that would otherwise map to 24.
+     */
+    if (oprsz == maxsz) {
+        oprsz = 2;
+    }
+
     desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
     desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
     desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
-- 
2.25.1

The union is unused; let "regs" appear in the main structure
without the "u.regs" wrapping.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h            |  4 +---
 tcg/tcg.c                    | 22 +++++++++++-----------
 tcg/aarch64/tcg-target.c.inc | 14 +++++++-------
 tcg/arm/tcg-target.c.inc     | 26 +++++++++++++-------------
 tcg/i386/tcg-target.c.inc    | 26 +++++++++++++-------------
 tcg/mips/tcg-target.c.inc    | 18 +++++++++---------
 tcg/ppc/tcg-target.c.inc     | 24 ++++++++++++------------
 tcg/riscv/tcg-target.c.inc   | 14 +++++++-------
 tcg/s390/tcg-target.c.inc    | 18 +++++++++---------
 tcg/sparc/tcg-target.c.inc   | 16 ++++++++--------
 tcg/tci/tcg-target.c.inc     |  2 +-
 11 files changed, 91 insertions(+), 93 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
 typedef struct TCGArgConstraint {
     uint16_t ct;
     uint8_t alias_index;
-    union {
-        TCGRegSet regs;
-    } u;
+    TCGRegSet regs;
 } TCGArgConstraint;
 
 #define TCG_MAX_OP_ARGS 16
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
             return 0;
         n = 0;
         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
-            if (tcg_regset_test_reg(arg_ct->u.regs, i))
+            if (tcg_regset_test_reg(arg_ct->regs, i))
                 n++;
         }
     }
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
             /* Incomplete TCGTargetOpDef entry. */
             tcg_debug_assert(ct_str != NULL);
 
-            def->args_ct[i].u.regs = 0;
+            def->args_ct[i].regs = 0;
             def->args_ct[i].ct = 0;
             while (*ct_str != '\0') {
                 switch(*ct_str) {
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
                     pset = la_temp_pref(ts);
                     set = *pset;
 
-                    set &= ct->u.regs;
+                    set &= ct->regs;
                     if (ct->ct & TCG_CT_IALIAS) {
                         set &= op->output_pref[ct->alias_index];
                     }
                     /* If the combination is not possible, restart.  */
                     if (set == 0) {
-                        set = ct->u.regs;
+                        set = ct->regs;
                     }
                     *pset = set;
                 }
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
         return;
     }
 
-    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
-    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
+    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
+    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
 
     /* Allocate the output register now.  */
     if (ots->val_type != TEMP_VAL_REG) {
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
             }
         }
 
-        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
+        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
         reg = ts->reg;
 
-        if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
+        if (tcg_regset_test_reg(arg_ct->regs, reg)) {
             /* nothing to do : the constraint is satisfied */
         } else {
         allocate_in_reg:
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
                and move the temporary register into it */
             temp_load(s, ts, tcg_target_available_regs[ts->type],
                       i_allocated_regs, 0);
-            reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
+            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
                                 o_preferred_regs, ts->indirect_base);
             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
                 /*
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
                 && !const_args[arg_ct->alias_index]) {
                 reg = new_args[arg_ct->alias_index];
             } else if (arg_ct->ct & TCG_CT_NEWREG) {
-                reg = tcg_reg_alloc(s, arg_ct->u.regs,
+                reg = tcg_reg_alloc(s, arg_ct->regs,
                                     i_allocated_regs | o_allocated_regs,
                                     op->output_pref[k], ts->indirect_base);
             } else {
-                reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
+                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
                                     op->output_pref[k], ts->indirect_base);
             }
             tcg_regset_set_reg(o_allocated_regs, reg);
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'r': /* general registers */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs |= 0xffffffffu;
+        ct->regs |= 0xffffffffu;
         break;
     case 'w': /* advsimd registers */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs |= 0xffffffff00000000ull;
+        ct->regs |= 0xffffffff00000000ull;
         break;
     case 'l': /* qemu_ld / qemu_st address, data_reg */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffffu;
+        ct->regs = 0xffffffffu;
 #ifdef CONFIG_SOFTMMU
         /* x0 and x1 will be overwritten when reading the tlb entry,
            and x2, and x3 for helper args, better to avoid using them. */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
 #endif
         break;
     case 'A': /* Valid for arithmetic immediate (positive or negative).  */
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
+        ct->regs = 0xffff;
         break;
 
     /* qemu_ld address */
     case 'l':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
+        ct->regs = 0xffff;
 #ifdef CONFIG_SOFTMMU
         /* r0-r2,lr will be overwritten when reading the tlb entry,
            so don't use these. */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
 #endif
         break;
 
     /* qemu_st address & data */
     case 's':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
+        ct->regs = 0xffff;
         /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
            and r0-r1 doing the byte swapping, so don't use these. */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
 #if defined(CONFIG_SOFTMMU)
         /* Avoid clashes with registers being used for helper args */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
 #if TARGET_LONG_BITS == 64
         /* Avoid clashes with registers being used for helper args */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #endif
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
 #endif
         break;
 
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch(*ct_str++) {
     case 'a':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
+        tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
         break;
     case 'b':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
+        tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
         break;
     case 'c':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
+        tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
         break;
     case 'd':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
+        tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
         break;
     case 'S':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
+        tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
         break;
     case 'D':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
+        tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
         break;
     case 'q':
         /* A register that can be used as a byte operand.  */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
+        ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
         break;
     case 'Q':
         /* A register with an addressable second byte (e.g. %ah).  */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xf;
+        ct->regs = 0xf;
         break;
     case 'r':
         /* A general register.  */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs |= ALL_GENERAL_REGS;
+        ct->regs |= ALL_GENERAL_REGS;
         break;
     case 'W':
         /* With TZCNT/LZCNT, we can have operand-size as an input.  */
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     case 'x':
         /* A vector register.  */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs |= ALL_VECTOR_REGS;
+        ct->regs |= ALL_VECTOR_REGS;
         break;
 
         /* qemu_ld/st address constraint */
     case 'L':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+        ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
         break;
 
     case 'e':
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch(*ct_str++) {
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         break;
     case 'L': /* qemu_ld input arg constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
+        ct->regs = 0xffffffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
+            tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
         }
 #endif
         break;
     case 'S': /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
+        ct->regs = 0xffffffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
+            tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
+            tcg_regset_reset_reg(ct->regs, TCG_REG_A3);
         } else {
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
+            tcg_regset_reset_reg(ct->regs, TCG_REG_A1);
         }
 #endif
         break;
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'A': case 'B': case 'C': case 'D':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
+        tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
         break;
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         break;
     case 'v':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff00000000ull;
+        ct->regs = 0xffffffff00000000ull;
         break;
     case 'L':                   /* qemu_ld constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+        ct->regs = 0xffffffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
 #endif
         break;
     case 'S':                   /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+        ct->regs = 0xffffffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
 #endif
         break;
     case 'I':
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         break;
     case 'L':
         /* qemu_ld/qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
 #if defined(CONFIG_SOFTMMU)
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]);
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[0]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[1]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[2]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[3]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[4]);
 #endif
         break;
     case 'I':
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'r':                  /* all registers */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
+        ct->regs = 0xffff;
         break;
     case 'L':                  /* qemu_ld/st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
+        ct->regs = 0xffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
         break;
     case 'a':                  /* force R2 for division */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_R2);
+        ct->regs = 0;
+        tcg_regset_set_reg(ct->regs, TCG_REG_R2);
         break;
     case 'b':                  /* force R3 for division */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
+        ct->regs = 0;
+        tcg_regset_set_reg(ct->regs, TCG_REG_R3);
         break;
     case 'A':
         ct->ct |= TCG_CT_CONST_S33;
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         break;
     case 'R':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = ALL_64;
+        ct->regs = ALL_64;
         break;
     case 'A': /* qemu_ld/st address constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
+        ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
     reserve_helpers:
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_O1);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
         break;
     case 's': /* qemu_st data 32-bit constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         goto reserve_helpers;
     case 'S': /* qemu_st data 64-bit constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = ALL_64;
+        ct->regs = ALL_64;
         goto reserve_helpers;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     case 'L':                   /* qemu_ld constraint */
     case 'S':                   /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1;
+        ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
         break;
     default:
         return NULL;
-- 
2.25.1

This uses an existing hole in the TCGArgConstraint structure
and will be convenient for keeping the data in one place.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h |  2 +-
 tcg/tcg.c         | 35 +++++++++++++++++------------------
 2 files changed, 18 insertions(+), 19 deletions(-)

This wasn't actually used for anything, really.  All variable
operands must accept registers, and which are indicated by the
set in TCGArgConstraint.regs.

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
 #define TCG_CT_ALIAS  0x80
 #define TCG_CT_IALIAS 0x40
 #define TCG_CT_NEWREG 0x20 /* output requires a new register */
-#define TCG_CT_REG    0x01
 #define TCG_CT_CONST  0x02 /* any constant of register size */
 
 typedef struct TCGArgConstraint {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
 /* we give more priority to constraints with less registers */
 static int get_constraint_priority(const TCGOpDef *def, int k)
 {
-    const TCGArgConstraint *arg_ct;
+    const TCGArgConstraint *arg_ct = &def->args_ct[k];
+    int n;
 
-    int i, n;
-    arg_ct = &def->args_ct[k];
     if (arg_ct->ct & TCG_CT_ALIAS) {
         /* an alias is equivalent to a single register */
         n = 1;
     } else {
-        if (!(arg_ct->ct & TCG_CT_REG))
-            return 0;
-        n = 0;
-        for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
-            if (tcg_regset_test_reg(arg_ct->regs, i))
-                n++;
-        }
+        n = ctpop64(arg_ct->regs);
     }
     return TCG_TARGET_NB_REGS - n + 1;
 }
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
                         int oarg = *ct_str - '0';
                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
                         tcg_debug_assert(oarg < def->nb_oargs);
-                        tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
+                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
                         /* TCG_CT_ALIAS is for the output arguments.
                            The input is tagged with TCG_CT_IALIAS. */
                         def->args_ct[i] = def->args_ct[oarg];
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'r': /* general registers */
-        ct->ct |= TCG_CT_REG;
         ct->regs |= 0xffffffffu;
         break;
     case 'w': /* advsimd registers */
-        ct->ct |= TCG_CT_REG;
         ct->regs |= 0xffffffff00000000ull;
         break;
     case 'l': /* qemu_ld / qemu_st address, data_reg */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffffu;
 #ifdef CONFIG_SOFTMMU
         /* x0 and x1 will be overwritten when reading the tlb entry,
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
         break;
 
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
         break;
 
     /* qemu_ld address */
     case 'l':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
 #ifdef CONFIG_SOFTMMU
         /* r0-r2,lr will be overwritten when reading the tlb entry,
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 
     /* qemu_st address & data */
     case 's':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
         /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
            and r0-r1 doing the byte swapping, so don't use these. */
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch(*ct_str++) {
     case 'a':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
         break;
     case 'b':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
         break;
     case 'c':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
         break;
     case 'd':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
         break;
     case 'S':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
         break;
     case 'D':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
         break;
     case 'q':
         /* A register that can be used as a byte operand.  */
-        ct->ct |= TCG_CT_REG;
         ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
         break;
     case 'Q':
         /* A register with an addressable second byte (e.g. %ah).  */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xf;
         break;
     case 'r':
         /* A general register.  */
-        ct->ct |= TCG_CT_REG;
         ct->regs |= ALL_GENERAL_REGS;
         break;
     case 'W':
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
         break;
     case 'x':
         /* A vector register.  */
-        ct->ct |= TCG_CT_REG;
         ct->regs |= ALL_VECTOR_REGS;
         break;
 
         /* qemu_ld/st address constraint */
     case 'L':
-        ct->ct |= TCG_CT_REG;
         ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
         tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch(*ct_str++) {
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         break;
     case 'L': /* qemu_ld input arg constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 #endif
         break;
     case 'S': /* qemu_st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'A': case 'B': case 'C': case 'D':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
         break;
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         break;
     case 'v':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff00000000ull;
         break;
     case 'L':                   /* qemu_ld constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 #endif
         break;
     case 'S':                   /* qemu_st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         break;
     case 'L':
         /* qemu_ld/qemu_st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
 #if defined(CONFIG_SOFTMMU)
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'r':                  /* all registers */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
         break;
     case 'L':                  /* qemu_ld/st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
         tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
         break;
     case 'a':                  /* force R2 for division */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0;
         tcg_regset_set_reg(ct->regs, TCG_REG_R2);
         break;
     case 'b':                  /* force R3 for division */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0;
         tcg_regset_set_reg(ct->regs, TCG_REG_R3);
         break;
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         break;
     case 'R':
-        ct->ct |= TCG_CT_REG;
         ct->regs = ALL_64;
         break;
     case 'A': /* qemu_ld/st address constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
     reserve_helpers:
         tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
         tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
         break;
     case 's': /* qemu_st data 32-bit constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         goto reserve_helpers;
     case 'S': /* qemu_st data 64-bit constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = ALL_64;
         goto reserve_helpers;
     case 'I':
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     case 'r':
     case 'L':                   /* qemu_ld constraint */
     case 'S':                   /* qemu_st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
         break;
     default:
-- 
2.25.1

These are easier to set and test when they have their own fields.
Reduce the size of alias_index and sort_index to 4 bits, which is
sufficient for TCG_MAX_OP_ARGS.  This leaves only the bits indicating
constants within the ct field.

Move all initialization to allocation time, rather than init
individual fields in process_op_defs.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h | 14 +++++++-------
 tcg/tcg.c         | 28 ++++++++++++----------------
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void);
 void tcg_dump_info(void);
 void tcg_dump_op_count(void);
 
-#define TCG_CT_ALIAS  0x80
-#define TCG_CT_IALIAS 0x40
-#define TCG_CT_NEWREG 0x20 /* output requires a new register */
-#define TCG_CT_CONST  0x02 /* any constant of register size */
+#define TCG_CT_CONST  1 /* any constant of register size */
 
 typedef struct TCGArgConstraint {
-    uint16_t ct;
-    uint8_t alias_index;
-    uint8_t sort_index;
+    unsigned ct : 16;
+    unsigned alias_index : 4;
+    unsigned sort_index : 4;
+    bool oalias : 1;
+    bool ialias : 1;
+    bool newreg : 1;
     TCGRegSet regs;
 } TCGArgConstraint;
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
         total_args += n;
     }
 
-    args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
+    args_ct = g_new0(TCGArgConstraint, total_args);
 
     for(op = 0; op < NB_OPS; op++) {
         def = &tcg_op_defs[op];
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
     const TCGArgConstraint *arg_ct = &def->args_ct[k];
     int n;
 
-    if (arg_ct->ct & TCG_CT_ALIAS) {
+    if (arg_ct->oalias) {
         /* an alias is equivalent to a single register */
         n = 1;
     } else {
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
             /* Incomplete TCGTargetOpDef entry. */
             tcg_debug_assert(ct_str != NULL);
 
-            def->args_ct[i].regs = 0;
-            def->args_ct[i].ct = 0;
             while (*ct_str != '\0') {
                 switch(*ct_str) {
                 case '0' ... '9':
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
                         tcg_debug_assert(oarg < def->nb_oargs);
                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
-                        /* TCG_CT_ALIAS is for the output arguments.
-                           The input is tagged with TCG_CT_IALIAS. */
                         def->args_ct[i] = def->args_ct[oarg];
-                        def->args_ct[oarg].ct |= TCG_CT_ALIAS;
+                        /* The output sets oalias.  */
+                        def->args_ct[oarg].oalias = true;
                         def->args_ct[oarg].alias_index = i;
-                        def->args_ct[i].ct |= TCG_CT_IALIAS;
+                        /* The input sets ialias. */
+                        def->args_ct[i].ialias = true;
                         def->args_ct[i].alias_index = oarg;
                     }
                     ct_str++;
                     break;
                 case '&':
-                    def->args_ct[i].ct |= TCG_CT_NEWREG;
+                    def->args_ct[i].newreg = true;
                     ct_str++;
                     break;
                 case 'i':
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
                     set = *pset;
 
                     set &= ct->regs;
-                    if (ct->ct & TCG_CT_IALIAS) {
+                    if (ct->ialias) {
                         set &= op->output_pref[ct->alias_index];
                     }
                     /* If the combination is not possible, restart.  */
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
         }
 
         i_preferred_regs = o_preferred_regs = 0;
-        if (arg_ct->ct & TCG_CT_IALIAS) {
+        if (arg_ct->ialias) {
             o_preferred_regs = op->output_pref[arg_ct->alias_index];
             if (ts->fixed_reg) {
                 /* if fixed register, we must allocate a new register
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
                     reg = ts->reg;
                     for (k2 = 0 ; k2 < k ; k2++) {
                         i2 = def->args_ct[nb_oargs + k2].sort_index;
-                        if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
-                            reg == new_args[i2]) {
+                        if (def->args_ct[i2].ialias && reg == new_args[i2]) {
                             goto allocate_in_reg;
                         }
                     }
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
             /* ENV should not be modified.  */
             tcg_debug_assert(!ts->fixed_reg);
 
-            if ((arg_ct->ct & TCG_CT_ALIAS)
-                && !const_args[arg_ct->alias_index]) {
+            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
                 reg = new_args[arg_ct->alias_index];
-            } else if (arg_ct->ct & TCG_CT_NEWREG) {
+            } else if (arg_ct->newreg) {
                 reg = tcg_reg_alloc(s, arg_ct->regs,
                                     i_allocated_regs | o_allocated_regs,
                                     op->output_pref[k], ts->indirect_base);
-- 
2.25.1

The definition of INDEX_op_dupi_vec is that it operates on
units of tcg_target_ulong -- in this case 32 bits.  It does
not work to use this for a uint64_t value that happens to be
small enough to fit in tcg_target_ulong.

Fixes: d2fd745fe8b
Fixes: db432672dc5
Cc: qemu-stable@nongnu.org
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg-op-vec.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
 
 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
 {
-    if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
-        do_dupi_vec(r, MO_32, a);
-    } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
+    if (TCG_TARGET_REG_BITS == 64) {
         do_dupi_vec(r, MO_64, a);
+    } else if (a == dup_const(MO_32, a)) {
+        do_dupi_vec(r, MO_32, a);
     } else {
         TCGv_i64 c = tcg_const_i64(a);
         tcg_gen_dup_i64_vec(MO_64, r, c);
@@ -XXX,XX +XXX,XX @@ void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
 
 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 {
-    do_dupi_vec(r, MO_REG, dup_const(vece, a));
+    if (vece == MO_64) {
+        tcg_gen_dup64i_vec(r, a);
+    } else {
+        do_dupi_vec(r, MO_REG, dup_const(vece, a));
+    }
 }
 
 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
-- 
2.25.1

When the two arguments are identical, this can be reduced to
dup_vec or to mov_vec from a tcg_constant_vec.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             goto do_default;
 
+        case INDEX_op_dup2_vec:
+            assert(TCG_TARGET_REG_BITS == 32);
+            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+                tmp = arg_info(op->args[1])->val;
+                if (tmp == arg_info(op->args[2])->val) {
+                    tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                    break;
+                }
+            } else if (args_are_copies(op->args[1], op->args[2])) {
+                op->opc = INDEX_op_dup_vec;
+                TCGOP_VECE(op) = MO_32;
+                nb_iargs = 1;
+            }
+            goto do_default;
+
         CASE_OP_32_64(not):
         CASE_OP_32_64(neg):
         CASE_OP_32_64(ext8s):
-- 
2.25.1

The cmp_vec opcode is mandatory; this symbol is unused.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.h | 1 -
 tcg/i386/tcg-target.h    | 1 -
 tcg/ppc/tcg-target.h     | 1 -
 3 files changed, 3 deletions(-)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 #define TCG_TARGET_HAS_shi_vec          1
 #define TCG_TARGET_HAS_shs_vec          0
 #define TCG_TARGET_HAS_shv_vec          1
-#define TCG_TARGET_HAS_cmp_vec          1
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
 #define TCG_TARGET_HAS_shi_vec          1
 #define TCG_TARGET_HAS_shs_vec          1
 #define TCG_TARGET_HAS_shv_vec          have_avx2
-#define TCG_TARGET_HAS_cmp_vec          1
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
 #define TCG_TARGET_HAS_shi_vec          0
 #define TCG_TARGET_HAS_shs_vec          0
 #define TCG_TARGET_HAS_shv_vec          1
-#define TCG_TARGET_HAS_cmp_vec          1
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
-- 
2.25.1

From: Kele Huang <kele.hwang@gmail.com>

Detect all MIPS store instructions in cpu_signal_handler for all available
MIPS versions, and set is_write if encountering such store instructions.

This fixed the error while dealing with self-modified code for MIPS.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Kele Huang <kele.hwang@gmail.com>
Signed-off-by: Xu Zou <iwatchnima@gmail.com>
Message-Id: <20201002081420.10814-1-kele.hwang@gmail.com>
[rth: Use uintptr_t for pc to fix n32 build error.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/user-exec.c | 43 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
 
 #elif defined(__mips__)
 
+#if defined(__misp16) || defined(__mips_micromips)
+#error "Unsupported encoding"
+#endif
+
 int cpu_signal_handler(int host_signum, void *pinfo,
                        void *puc)
 {
     siginfo_t *info = pinfo;
     ucontext_t *uc = puc;
-    greg_t pc = uc->uc_mcontext.pc;
-    int is_write;
+    uintptr_t pc = uc->uc_mcontext.pc;
+    uint32_t insn = *(uint32_t *)pc;
+    int is_write = 0;
+
+    /* Detect all store instructions at program counter. */
+    switch((insn >> 26) & 077) {
+    case 050: /* SB */
+    case 051: /* SH */
+    case 052: /* SWL */
+    case 053: /* SW */
+    case 054: /* SDL */
+    case 055: /* SDR */
+    case 056: /* SWR */
+    case 070: /* SC */
+    case 071: /* SWC1 */
+    case 074: /* SCD */
+    case 075: /* SDC1 */
+    case 077: /* SD */
+#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
+    case 072: /* SWC2 */
+    case 076: /* SDC2 */
+#endif
+        is_write = 1;
+        break;
+    case 023: /* COP1X */
+        /* Required in all versions of MIPS64 since
+           MIPS64r1 and subsequent versions of MIPS32r2. */
+        switch (insn & 077) {
+        case 010: /* SWXC1 */
+        case 011: /* SDXC1 */
+        case 015: /* SUXC1 */
+            is_write = 1;
+        }
+        break;
+    }
 
-    /* XXX: compute is_write */
-    is_write = 0;
     return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
 }
 
-- 
2.25.1

Second try's the charm today, right?

The following changes since commit 00b1faea41d283e931256aa78aa975a369ec3ae6:

Merge tag 'pull-target-arm-20230123' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-01-23 13:40:28 +0000)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230123

for you to fetch changes up to 709bcd7da3f6b4655d910634a0d520fa1439df38:

tcg/loongarch64: Reorg goto_tb implementation (2023-01-23 16:00:13 -1000)

----------------------------------------------------------------
common-user: Re-enable ppc32 host
tcg: Avoid recursion in tcg_gen_mulu2_i32
tcg: Mark tcg helpers noinline to avoid an issue with LTO
tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
disas: Enable loongarch disassembler, and fixes
tcg/loongarch64: Improve move immediate
tcg/loongarch64: Improve add immediate
tcg/loongarch64: Improve setcond
tcg/loongarch64: Implement movcond
tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
tcg/loongarch64: Reorg goto_tb implementation

----------------------------------------------------------------
Richard Henderson (14):
      tcg: Avoid recursion in tcg_gen_mulu2_i32
      tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
      common-user/host/ppc: Implement safe-syscall.inc.S
      linux-user: Implment host/ppc/host-signal.h
      tcg: Mark tcg helpers noinline to avoid an issue with LTO
      target/loongarch: Enable the disassembler for host tcg
      target/loongarch: Disassemble jirl properly
      target/loongarch: Disassemble pcadd* addresses
      tcg/loongarch64: Update tcg-insn-defs.c.inc
      tcg/loongarch64: Introduce tcg_out_addi
      tcg/loongarch64: Improve setcond expansion
      tcg/loongarch64: Implement movcond
      tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
      tcg/loongarch64: Reorg goto_tb implementation

Rui Wang (1):
      tcg/loongarch64: Optimize immediate loading

include/exec/helper-proto.h                    |  32 ++-
 include/tcg/tcg.h                              |   7 -
 linux-user/include/host/ppc/host-signal.h      |  39 +++
 tcg/arm/tcg-target-con-set.h                   |   7 +-
 tcg/arm/tcg-target-con-str.h                   |   2 +
 tcg/loongarch64/tcg-target-con-set.h           |   5 +-
 tcg/loongarch64/tcg-target-con-str.h           |   2 +-
 tcg/loongarch64/tcg-target.h                   |  11 +-
 target/loongarch/insns.decode                  |   3 +-
 disas.c                                        |   2 +
 target/loongarch/disas.c                       |  39 ++-
 tcg/tcg-op.c                                   |   4 +-
 target/loongarch/insn_trans/trans_branch.c.inc |   2 +-
 tcg/arm/tcg-target.c.inc                       |  28 +-
 tcg/loongarch64/tcg-insn-defs.c.inc            |  10 +-
 tcg/loongarch64/tcg-target.c.inc               | 364 ++++++++++++++++---------
 common-user/host/ppc/safe-syscall.inc.S        | 107 ++++++++
 target/loongarch/meson.build                   |   3 +-
 18 files changed, 497 insertions(+), 170 deletions(-)
 create mode 100644 linux-user/include/host/ppc/host-signal.h
 create mode 100644 common-user/host/ppc/safe-syscall.inc.S

We have a test for one of TCG_TARGET_HAS_mulu2_i32 or
TCG_TARGET_HAS_muluh_i32 being defined, but the test
became non-functional when we changed to always define
all of these macros.

Replace this with a build-time test in tcg_gen_mulu2_i32.

Fixes: 25c4d9cc845 ("tcg: Always define all of the TCGOpcode enum members.")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1435
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h | 7 -------
 tcg/tcg-op.c      | 4 +++-
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_rem_i64          0
 #endif
 
-/* For 32-bit targets, some sort of unsigned widening multiply is required.  */
-#if TCG_TARGET_REG_BITS == 32 \
-    && !(defined(TCG_TARGET_HAS_mulu2_i32) \
-         || defined(TCG_TARGET_HAS_muluh_i32))
-# error "Missing unsigned widening multiply"
-#endif
-
 #if !defined(TCG_TARGET_HAS_v64) \
     && !defined(TCG_TARGET_HAS_v128) \
     && !defined(TCG_TARGET_HAS_v256)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
         tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
         tcg_gen_mov_i32(rl, t);
         tcg_temp_free_i32(t);
-    } else {
+    } else if (TCG_TARGET_REG_BITS == 64) {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
         tcg_gen_extu_i32_i64(t0, arg1);
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
         tcg_gen_extr_i64_i32(rl, rh, t0);
         tcg_temp_free_i64(t0);
         tcg_temp_free_i64(t1);
+    } else {
+        qemu_build_not_reached();
     }
 }
 
-- 
2.34.1

Although we still can't use ldrd and strd for all operations,
increase the chances by getting the register allocation correct.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/arm/tcg-target-con-set.h |  7 ++++---
 tcg/arm/tcg-target-con-str.h |  2 ++
 tcg/arm/tcg-target.c.inc     | 28 ++++++++++++++++++----------
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target-con-set.h
+++ b/tcg/arm/tcg-target-con-set.h
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, rIN)
 C_O0_I2(s, s)
 C_O0_I2(w, r)
 C_O0_I3(s, s, s)
+C_O0_I3(S, p, s)
 C_O0_I4(r, r, rI, rI)
-C_O0_I4(s, s, s, s)
+C_O0_I4(S, p, s, s)
 C_O1_I1(r, l)
 C_O1_I1(r, r)
 C_O1_I1(w, r)
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wZ)
 C_O1_I3(w, w, w, w)
 C_O1_I4(r, r, r, rI, rI)
 C_O1_I4(r, r, rIN, rIK, 0)
-C_O2_I1(r, r, l)
-C_O2_I2(r, r, l, l)
+C_O2_I1(e, p, l)
+C_O2_I2(e, p, l, l)
 C_O2_I2(r, r, r, r)
 C_O2_I4(r, r, r, r, rIN, rIK)
 C_O2_I4(r, r, rI, rI, rIN, rIK)
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target-con-str.h
+++ b/tcg/arm/tcg-target-con-str.h
@@ -XXX,XX +XXX,XX @@
  * Define constraint letters for register sets:
  * REGS(letter, register_mask)
  */
+REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
 REGS('r', ALL_GENERAL_REGS)
 REGS('l', ALL_QLOAD_REGS)
 REGS('s', ALL_QSTORE_REGS)
+REGS('S', ALL_QSTORE_REGS & 0x5555)  /* even qstore */
 REGS('w', ALL_VECTOR_REGS)
 
 /*
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
         tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
         break;
     case MO_UQ:
+        /* We used pair allocation for datalo, so already should be aligned. */
+        tcg_debug_assert((datalo & 1) == 0);
+        tcg_debug_assert(datahi == datalo + 1);
         /* LDRD requires alignment; double-check that. */
-        if (get_alignment_bits(opc) >= MO_64
-            && (datalo & 1) == 0 && datahi == datalo + 1) {
+        if (get_alignment_bits(opc) >= MO_64) {
             /*
              * Rm (the second address op) must not overlap Rt or Rt + 1.
              * Since datalo is aligned, we can simplify the test via alignment.
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
         tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_UQ:
+        /* We used pair allocation for datalo, so already should be aligned. */
+        tcg_debug_assert((datalo & 1) == 0);
+        tcg_debug_assert(datahi == datalo + 1);
         /* LDRD requires alignment; double-check that. */
-        if (get_alignment_bits(opc) >= MO_64
-            && (datalo & 1) == 0 && datahi == datalo + 1) {
+        if (get_alignment_bits(opc) >= MO_64) {
             tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
         } else if (datalo == addrlo) {
             tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
         tcg_out_st32_r(s, cond, datalo, addrlo, addend);
         break;
     case MO_64:
+        /* We used pair allocation for datalo, so already should be aligned. */
+        tcg_debug_assert((datalo & 1) == 0);
+        tcg_debug_assert(datahi == datalo + 1);
         /* STRD requires alignment; double-check that. */
-        if (get_alignment_bits(opc) >= MO_64
-            && (datalo & 1) == 0 && datahi == datalo + 1) {
+        if (get_alignment_bits(opc) >= MO_64) {
             tcg_out_strd_r(s, cond, datalo, addrlo, addend);
         } else if (scratch_addend) {
             tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
         tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_64:
+        /* We used pair allocation for datalo, so already should be aligned. */
+        tcg_debug_assert((datalo & 1) == 0);
+        tcg_debug_assert(datahi == datalo + 1);
         /* STRD requires alignment; double-check that. */
-        if (get_alignment_bits(opc) >= MO_64
-            && (datalo & 1) == 0 && datahi == datalo + 1) {
+        if (get_alignment_bits(opc) >= MO_64) {
             tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
         } else {
             tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_qemu_ld_i32:
         return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
     case INDEX_op_qemu_ld_i64:
-        return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l);
+        return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
     case INDEX_op_qemu_st_i32:
         return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
     case INDEX_op_qemu_st_i64:
-        return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
+        return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
 
     case INDEX_op_st_vec:
         return C_O0_I2(w, r);
-- 
2.34.1

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Message-Id: <20220729172141.1789105-2-richard.henderson@linaro.org>
---
 common-user/host/ppc/safe-syscall.inc.S | 107 ++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 common-user/host/ppc/safe-syscall.inc.S

diff --git a/common-user/host/ppc/safe-syscall.inc.S b/common-user/host/ppc/safe-syscall.inc.S
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/common-user/host/ppc/safe-syscall.inc.S
@@ -XXX,XX +XXX,XX @@
+/*
+ * safe-syscall.inc.S : host-specific assembly fragment
+ * to handle signals occurring at the same time as system calls.
+ * This is intended to be included by common-user/safe-syscall.S
+ *
+ * Copyright (C) 2022 Linaro, Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Standardize on the _CALL_FOO symbols used by GCC:
+ * Apple XCode does not define _CALL_DARWIN.
+ * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
+ */
+#if !defined(_CALL_SYSV) && \
+    !defined(_CALL_DARWIN) && \
+    !defined(_CALL_AIX) && \
+    !defined(_CALL_ELF)
+# if defined(__APPLE__)
+#  define _CALL_DARWIN
+# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
+#  define _CALL_SYSV
+# else
+#  error "Unknown ABI"
+# endif
+#endif 
+
+#ifndef _CALL_SYSV
+# error "Unsupported ABI"
+#endif
+
+
+        .global safe_syscall_base
+        .global safe_syscall_start
+        .global safe_syscall_end
+        .type   safe_syscall_base, @function
+
+        .text
+
+        /*
+         * This is the entry point for making a system call. The calling
+         * convention here is that of a C varargs function with the
+         * first argument an 'int *' to the signal_pending flag, the
+         * second one the system call number (as a 'long'), and all further
+         * arguments being syscall arguments (also 'long').
+         */
+safe_syscall_base:
+        .cfi_startproc
+        stwu    1, -8(1)
+        .cfi_def_cfa_offset 8
+        stw     30, 4(1)
+        .cfi_offset 30, -4
+
+        /*
+         * We enter with r3 == &signal_pending
+         *               r4 == syscall number
+         *               r5 ... r10 == syscall arguments
+         *               and return the result in r3
+         * and the syscall instruction needs
+         *               r0 == syscall number
+         *               r3 ... r8 == syscall arguments
+         *               and returns the result in r3
+         * Shuffle everything around appropriately.
+         */
+        mr      30, 3           /* signal_pending */
+        mr      0, 4            /* syscall number */
+        mr      3, 5            /* syscall arguments */
+        mr      4, 6
+        mr      5, 7
+        mr      6, 8
+        mr      7, 9
+        mr      8, 10
+
+        /*
+         * This next sequence of code works in conjunction with the
+         * rewind_if_safe_syscall_function(). If a signal is taken
+         * and the interrupted PC is anywhere between 'safe_syscall_start'
+         * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
+         * The code sequence must therefore be able to cope with this, and
+         * the syscall instruction must be the final one in the sequence.
+         */
+safe_syscall_start:
+        /* if signal_pending is non-zero, don't do the call */
+        lwz     12, 0(30)
+        cmpwi   0, 12, 0
+        bne-    2f
+        sc
+safe_syscall_end:
+        /* code path when we did execute the syscall */
+        lwz     30, 4(1)        /* restore r30 */
+        addi    1, 1, 8         /* restore stack */
+        .cfi_restore 30
+        .cfi_def_cfa_offset 0
+        bnslr+                  /* return on success */
+        b       safe_syscall_set_errno_tail
+
+        /* code path when we didn't execute the syscall */
+2:      lwz     30, 4(1)
+        addi    1, 1, 8
+        addi    3, 0, QEMU_ERESTARTSYS
+        b       safe_syscall_set_errno_tail
+
+        .cfi_endproc
+
+        .size   safe_syscall_base, .-safe_syscall_base
-- 
2.34.1

This commit re-enables ppc32 as a linux-user host,
as existance of the directory is noted by configure.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1097
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Message-Id: <20220729172141.1789105-3-richard.henderson@linaro.org>
---
 linux-user/include/host/ppc/host-signal.h | 39 +++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 linux-user/include/host/ppc/host-signal.h

diff --git a/linux-user/include/host/ppc/host-signal.h b/linux-user/include/host/ppc/host-signal.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/linux-user/include/host/ppc/host-signal.h
@@ -XXX,XX +XXX,XX @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2022 Linaro Ltd.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_HOST_SIGNAL_H
+#define PPC_HOST_SIGNAL_H
+
+#include <asm/ptrace.h>
+
+/* The third argument to a SA_SIGINFO handler is ucontext_t. */
+typedef ucontext_t host_sigcontext;
+
+static inline uintptr_t host_signal_pc(host_sigcontext *uc)
+{
+    return uc->uc_mcontext.regs->nip;
+}
+
+static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.regs->nip = pc;
+}
+
+static inline void *host_signal_mask(host_sigcontext *uc)
+{
+    return &uc->uc_sigmask;
+}
+
+static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc)
+{
+    return uc->uc_mcontext.regs->trap != 0x400
+        && (uc->uc_mcontext.regs->dsisr & 0x02000000);
+}
+
+#endif
-- 
2.34.1

Marking helpers __attribute__((noinline)) prevents an issue
with GCC's ipa-split pass under --enable-lto.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1454
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Idan Horowitz <idan.horowitz@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/helper-proto.h | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/helper-proto.h
+++ b/include/exec/helper-proto.h
@@ -XXX,XX +XXX,XX @@
 
 #include "exec/helper-head.h"
 
+/*
+ * Work around an issue with --enable-lto, in which GCC's ipa-split pass
+ * decides to split out the noreturn code paths that raise an exception,
+ * taking the __builtin_return_address() along into the new function,
+ * where it no longer computes a value that returns to TCG generated code.
+ * Despite the name, the noinline attribute affects splitter, so this
+ * prevents the optimization in question.  Given that helpers should not
+ * otherwise be called directly, this should have any other visible effect.
+ *
+ * See https://gitlab.com/qemu-project/qemu/-/issues/1454
+ */
+#define DEF_HELPER_ATTR  __attribute__((noinline))
+
 #define DEF_HELPER_FLAGS_0(name, flags, ret) \
-dh_ctype(ret) HELPER(name) (void);
+dh_ctype(ret) HELPER(name) (void) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1));
+dh_ctype(ret) HELPER(name) (dh_ctype(t1)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3));
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), \
+                            dh_ctype(t3)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                                   dh_ctype(t4));
+                            dh_ctype(t4)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                            dh_ctype(t4), dh_ctype(t5));
+                            dh_ctype(t4), dh_ctype(t5)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                            dh_ctype(t4), dh_ctype(t5), dh_ctype(t6));
+                            dh_ctype(t4), dh_ctype(t5), \
+                            dh_ctype(t6)) DEF_HELPER_ATTR;
 
 #define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
 dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
                             dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
-                            dh_ctype(t7));
+                            dh_ctype(t7)) DEF_HELPER_ATTR;
 
 #define IN_HELPER_PROTO
 
@@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
 #undef DEF_HELPER_FLAGS_5
 #undef DEF_HELPER_FLAGS_6
 #undef DEF_HELPER_FLAGS_7
+#undef DEF_HELPER_ATTR
 
 #endif /* HELPER_PROTO_H */
-- 
2.34.1

Reuse the decodetree based disassembler from
target/loongarch/ for tcg/loongarch64/.

The generation of decode-insns.c.inc into ./libcommon.fa.p/ could
eventually result in conflict, if any other host requires the same
trick, but this is good enough for now.

Reviewed-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 disas.c                      | 2 ++
 target/loongarch/meson.build | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/disas.c b/disas.c
index XXXXXXX..XXXXXXX 100644
--- a/disas.c
+++ b/disas.c
@@ -XXX,XX +XXX,XX @@ static void initialize_debug_host(CPUDebug *s)
     s->info.cap_insn_split = 6;
 #elif defined(__hppa__)
     s->info.print_insn = print_insn_hppa;
+#elif defined(__loongarch__)
+    s->info.print_insn = print_insn_loongarch;
 #endif
 }
 
diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
index XXXXXXX..XXXXXXX 100644
--- a/target/loongarch/meson.build
+++ b/target/loongarch/meson.build
@@ -XXX,XX +XXX,XX @@ gen = decodetree.process('insns.decode')
 loongarch_ss = ss.source_set()
 loongarch_ss.add(files(
   'cpu.c',
-  'disas.c',
 ))
 loongarch_tcg_ss = ss.source_set()
 loongarch_tcg_ss.add(gen)
@@ -XXX,XX +XXX,XX @@ loongarch_softmmu_ss.add(files(
   'iocsr_helper.c',
 ))
 
+common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen])
+
 loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss])
 
 target_arch += {'loongarch': loongarch_ss}
-- 
2.34.1

While jirl shares the same instruction format as bne etc,
it is not assembled the same.  In particular, rd is printed
first not second and the immediate is not pc-relative.

Decode into the arg_rr_i structure, which prints correctly.
This changes the "offs" member to "imm", to update translate.

Reviewed-by: WANG Xuerui <git@xen0n.name>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/loongarch/insns.decode                  | 3 ++-
 target/loongarch/disas.c                       | 2 +-
 target/loongarch/insn_trans/trans_branch.c.inc | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index XXXXXXX..XXXXXXX 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -XXX,XX +XXX,XX @@
 @rr_ui12                 .... ...... imm:12 rj:5 rd:5    &rr_i
 @rr_i14s2         .... ....  .............. rj:5 rd:5    &rr_i imm=%i14s2
 @rr_i16                     .... .. imm:s16 rj:5 rd:5    &rr_i
+@rr_i16s2         .... ..  ................ rj:5 rd:5    &rr_i imm=%offs16
 @hint_r_i12           .... ...... imm:s12 rj:5 hint:5    &hint_r_i
 @rrr_sa2p1        .... ........ ... .. rk:5 rj:5 rd:5    &rrr_sa  sa=%sa2p1
 @rrr_sa2        .... ........ ... sa:2 rk:5 rj:5 rd:5    &rrr_sa
@@ -XXX,XX +XXX,XX @@ beqz            0100 00 ................ ..... .....     @r_offs21
 bnez            0100 01 ................ ..... .....     @r_offs21
 bceqz           0100 10 ................ 00 ... .....    @c_offs21
 bcnez           0100 10 ................ 01 ... .....    @c_offs21
-jirl            0100 11 ................ ..... .....     @rr_offs16
+jirl            0100 11 ................ ..... .....     @rr_i16s2
 b               0101 00 ..........................       @offs26
 bl              0101 01 ..........................       @offs26
 beq             0101 10 ................ ..... .....     @rr_offs16
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index XXXXXXX..XXXXXXX 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -XXX,XX +XXX,XX @@ INSN(beqz,         r_offs)
 INSN(bnez,         r_offs)
 INSN(bceqz,        c_offs)
 INSN(bcnez,        c_offs)
-INSN(jirl,         rr_offs)
+INSN(jirl,         rr_i)
 INSN(b,            offs)
 INSN(bl,           offs)
 INSN(beq,          rr_offs)
diff --git a/target/loongarch/insn_trans/trans_branch.c.inc b/target/loongarch/insn_trans/trans_branch.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/target/loongarch/insn_trans/trans_branch.c.inc
+++ b/target/loongarch/insn_trans/trans_branch.c.inc
@@ -XXX,XX +XXX,XX @@ static bool trans_jirl(DisasContext *ctx, arg_jirl *a)
     TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
     TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
 
-    tcg_gen_addi_tl(cpu_pc, src1, a->offs);
+    tcg_gen_addi_tl(cpu_pc, src1, a->imm);
     tcg_gen_movi_tl(dest, ctx->base.pc_next + 4);
     gen_set_gpr(a->rd, dest, EXT_NONE);
     tcg_gen_lookup_and_goto_ptr();
-- 
2.34.1

Print both the raw field and the resolved pc-relative
address, as we do for branches.

Reviewed-by: WANG Xuerui <git@xen0n.name>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/loongarch/disas.c | 37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index XXXXXXX..XXXXXXX 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -XXX,XX +XXX,XX @@ INSN(fsel,         fffc)
 INSN(addu16i_d,    rr_i)
 INSN(lu12i_w,      r_i)
 INSN(lu32i_d,      r_i)
-INSN(pcaddi,       r_i)
-INSN(pcalau12i,    r_i)
-INSN(pcaddu12i,    r_i)
-INSN(pcaddu18i,    r_i)
 INSN(ll_w,         rr_i)
 INSN(sc_w,         rr_i)
 INSN(ll_d,         rr_i)
@@ -XXX,XX +XXX,XX @@ static bool trans_fcmp_cond_##suffix(DisasContext *ctx, \
 
 FCMP_INSN(s)
 FCMP_INSN(d)
+
+#define PCADD_INSN(name)                                        \
+static bool trans_##name(DisasContext *ctx, arg_##name *a)      \
+{                                                               \
+    output(ctx, #name, "r%d, %d # 0x%" PRIx64,                  \
+           a->rd, a->imm, gen_##name(ctx->pc, a->imm));         \
+    return true;                                                \
+}
+
+static uint64_t gen_pcaddi(uint64_t pc, int imm)
+{
+    return pc + (imm << 2);
+}
+
+static uint64_t gen_pcalau12i(uint64_t pc, int imm)
+{
+    return (pc + (imm << 12)) & ~0xfff;
+}
+
+static uint64_t gen_pcaddu12i(uint64_t pc, int imm)
+{
+    return pc + (imm << 12);
+}
+
+static uint64_t gen_pcaddu18i(uint64_t pc, int imm)
+{
+    return pc + ((uint64_t)(imm) << 18);
+}
+
+PCADD_INSN(pcaddi)
+PCADD_INSN(pcalau12i)
+PCADD_INSN(pcaddu12i)
+PCADD_INSN(pcaddu18i)
-- 
2.34.1

From: Rui Wang <wangrui@loongson.cn>

diff:
  Imm                 Before                  After
  0000000000000000    addi.w  rd, zero, 0     addi.w  rd, zero, 0
                      lu52i.d rd, zero, 0
  00000000fffff800    lu12i.w rd, -1          addi.w  rd, zero, -2048
                      ori     rd, rd, 2048    lu32i.d rd, 0
                      lu32i.d rd, 0

Reviewed-by: WANG Xuerui <git@xen0n.name>
Signed-off-by: Rui Wang <wangrui@loongson.cn>
Message-Id: <20221107144713.845550-1-wangrui@loongson.cn>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target.c.inc | 35 +++++++++++---------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

Regenerate with ADDU16I included:

$ cd loongarch-opcodes/scripts/go
   $ go run ./genqemutcgdefs > $QEMU/tcg/loongarch64/tcg-insn-defs.c.inc

Reviewed-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-insn-defs.c.inc | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-insn-defs.c.inc
+++ b/tcg/loongarch64/tcg-insn-defs.c.inc
@@ -XXX,XX +XXX,XX @@
  *
  * This file is auto-generated by genqemutcgdefs from
  * https://github.com/loongson-community/loongarch-opcodes,
- * from commit 961f0c60f5b63e574d785995600c71ad5413fdc4.
+ * from commit 25ca7effe9d88101c1cf96c4005423643386d81f.
  * DO NOT EDIT.
  */
 
@@ -XXX,XX +XXX,XX @@ typedef enum {
     OPC_ANDI = 0x03400000,
     OPC_ORI = 0x03800000,
     OPC_XORI = 0x03c00000,
+    OPC_ADDU16I_D = 0x10000000,
     OPC_LU12I_W = 0x14000000,
     OPC_CU32I_D = 0x16000000,
     OPC_PCADDU2I = 0x18000000,
@@ -XXX,XX +XXX,XX @@ tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
     tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12));
 }
 
+/* Emits the `addu16i.d d, j, sk16` instruction.  */
+static void __attribute__((unused))
+tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
+{
+    tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16));
+}
+
 /* Emits the `lu12i.w d, sj20` instruction.  */
 static void __attribute__((unused))
 tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20)
-- 
2.34.1

Adjust the constraints to allow any int32_t for immediate
addition.  Split immediate adds into addu16i + addi, which
covers quite a lot of the immediate space.  For the hole in
the middle, load the constant into TMP0 instead.

Reviewed-by: WANG Xuerui <git@xen0n.name>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target-con-set.h |  4 +-
 tcg/loongarch64/tcg-target-con-str.h |  2 +-
 tcg/loongarch64/tcg-target.c.inc     | 57 ++++++++++++++++++++++++----
 3 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -XXX,XX +XXX,XX @@ C_O1_I1(r, L)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
+C_O1_I2(r, r, rJ)
 C_O1_I2(r, r, rU)
 C_O1_I2(r, r, rW)
 C_O1_I2(r, r, rZ)
 C_O1_I2(r, 0, rZ)
-C_O1_I2(r, rZ, rN)
+C_O1_I2(r, rZ, ri)
+C_O1_I2(r, rZ, rJ)
 C_O1_I2(r, rZ, rZ)
diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target-con-str.h
+++ b/tcg/loongarch64/tcg-target-con-str.h
@@ -XXX,XX +XXX,XX @@ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
  * CONST(letter, TCG_CT_CONST_* bit set)
  */
 CONST('I', TCG_CT_CONST_S12)
-CONST('N', TCG_CT_CONST_N12)
+CONST('J', TCG_CT_CONST_S32)
 CONST('U', TCG_CT_CONST_U12)
 CONST('Z', TCG_CT_CONST_ZERO)
 CONST('C', TCG_CT_CONST_C12)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_oarg_regs[] = {
 
 #define TCG_CT_CONST_ZERO  0x100
 #define TCG_CT_CONST_S12   0x200
-#define TCG_CT_CONST_N12   0x400
+#define TCG_CT_CONST_S32   0x400
 #define TCG_CT_CONST_U12   0x800
 #define TCG_CT_CONST_C12   0x1000
 #define TCG_CT_CONST_WSZ   0x2000
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
     if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
         return true;
     }
-    if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) {
+    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
         return true;
     }
     if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) {
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
     }
 }
 
+static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd,
+                         TCGReg rs, tcg_target_long imm)
+{
+    tcg_target_long lo12 = sextreg(imm, 0, 12);
+    tcg_target_long hi16 = sextreg(imm - lo12, 16, 16);
+
+    /*
+     * Note that there's a hole in between hi16 and lo12:
+     *
+     *       3                   2                   1                   0
+     *     1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+     * ...+-------------------------------+-------+-----------------------+
+     *    |             hi16              |       |          lo12         |
+     * ...+-------------------------------+-------+-----------------------+
+     *
+     * For bits within that hole, it's more efficient to use LU12I and ADD.
+     */
+    if (imm == (hi16 << 16) + lo12) {
+        if (hi16) {
+            tcg_out_opc_addu16i_d(s, rd, rs, hi16);
+            rs = rd;
+        }
+        if (type == TCG_TYPE_I32) {
+            tcg_out_opc_addi_w(s, rd, rs, lo12);
+        } else if (lo12) {
+            tcg_out_opc_addi_d(s, rd, rs, lo12);
+        } else {
+            tcg_out_mov(s, type, rd, rs);
+        }
+    } else {
+        tcg_out_movi(s, type, TCG_REG_TMP0, imm);
+        if (type == TCG_TYPE_I32) {
+            tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0);
+        } else {
+            tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0);
+        }
+    }
+}
+
 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
 {
     tcg_out_opc_andi(s, ret, arg, 0xff);
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_add_i32:
         if (c2) {
-            tcg_out_opc_addi_w(s, a0, a1, a2);
+            tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2);
         } else {
             tcg_out_opc_add_w(s, a0, a1, a2);
         }
         break;
     case INDEX_op_add_i64:
         if (c2) {
-            tcg_out_opc_addi_d(s, a0, a1, a2);
+            tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2);
         } else {
             tcg_out_opc_add_d(s, a0, a1, a2);
         }
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_sub_i32:
         if (c2) {
-            tcg_out_opc_addi_w(s, a0, a1, -a2);
+            tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2);
         } else {
             tcg_out_opc_sub_w(s, a0, a1, a2);
         }
         break;
     case INDEX_op_sub_i64:
         if (c2) {
-            tcg_out_opc_addi_d(s, a0, a1, -a2);
+            tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2);
         } else {
             tcg_out_opc_sub_d(s, a0, a1, a2);
         }
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I2(r, r, ri);
 
     case INDEX_op_add_i32:
+        return C_O1_I2(r, r, ri);
     case INDEX_op_add_i64:
-        return C_O1_I2(r, r, rI);
+        return C_O1_I2(r, r, rJ);
 
     case INDEX_op_and_i32:
     case INDEX_op_and_i64:
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I2(r, 0, rZ);
 
     case INDEX_op_sub_i32:
+        return C_O1_I2(r, rZ, ri);
     case INDEX_op_sub_i64:
-        return C_O1_I2(r, rZ, rN);
+        return C_O1_I2(r, rZ, rJ);
 
     case INDEX_op_mul_i32:
     case INDEX_op_mul_i64:
-- 
2.34.1

Split out a helper function, tcg_out_setcond_int, which
does not always produce the complete boolean result, but
returns a set of flags to do so.

Accept all int32_t as constant input, so that LE/GT can
adjust the constant to LT.

Reviewed-by: WANG Xuerui <git@xen0n.name>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target.c.inc | 165 +++++++++++++++++++++----------
 1 file changed, 115 insertions(+), 50 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
     tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
 }
 
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
-                            TCGReg arg1, TCGReg arg2, bool c2)
-{
-    TCGReg tmp;
+#define SETCOND_INV    TCG_TARGET_NB_REGS
+#define SETCOND_NEZ    (SETCOND_INV << 1)
+#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
 
-    if (c2) {
-        tcg_debug_assert(arg2 == 0);
+static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
+                               TCGReg arg1, tcg_target_long arg2, bool c2)
+{
+    int flags = 0;
+
+    switch (cond) {
+    case TCG_COND_EQ:    /* -> NE  */
+    case TCG_COND_GE:    /* -> LT  */
+    case TCG_COND_GEU:   /* -> LTU */
+    case TCG_COND_GT:    /* -> LE  */
+    case TCG_COND_GTU:   /* -> LEU */
+        cond = tcg_invert_cond(cond);
+        flags ^= SETCOND_INV;
+        break;
+    default:
+        break;
     }
 
     switch (cond) {
-    case TCG_COND_EQ:
-        if (c2) {
-            tmp = arg1;
-        } else {
-            tcg_out_opc_sub_d(s, ret, arg1, arg2);
-            tmp = ret;
-        }
-        tcg_out_opc_sltui(s, ret, tmp, 1);
-        break;
-    case TCG_COND_NE:
-        if (c2) {
-            tmp = arg1;
-        } else {
-            tcg_out_opc_sub_d(s, ret, arg1, arg2);
-            tmp = ret;
-        }
-        tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
-        break;
-    case TCG_COND_LT:
-        tcg_out_opc_slt(s, ret, arg1, arg2);
-        break;
-    case TCG_COND_GE:
-        tcg_out_opc_slt(s, ret, arg1, arg2);
-        tcg_out_opc_xori(s, ret, ret, 1);
-        break;
     case TCG_COND_LE:
-        tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false);
-        break;
-    case TCG_COND_GT:
-        tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false);
-        break;
-    case TCG_COND_LTU:
-        tcg_out_opc_sltu(s, ret, arg1, arg2);
-        break;
-    case TCG_COND_GEU:
-        tcg_out_opc_sltu(s, ret, arg1, arg2);
-        tcg_out_opc_xori(s, ret, ret, 1);
-        break;
     case TCG_COND_LEU:
-        tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false);
+        /*
+         * If we have a constant input, the most efficient way to implement
+         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
+         * We don't need to care for this for LE because the constant input
+         * is still constrained to int32_t, and INT32_MAX+1 is representable
+         * in the 64-bit temporary register.
+         */
+        if (c2) {
+            if (cond == TCG_COND_LEU) {
+                /* unsigned <= -1 is true */
+                if (arg2 == -1) {
+                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
+                    return ret;
+                }
+                cond = TCG_COND_LTU;
+            } else {
+                cond = TCG_COND_LT;
+            }
+            arg2 += 1;
+        } else {
+            TCGReg tmp = arg2;
+            arg2 = arg1;
+            arg1 = tmp;
+            cond = tcg_swap_cond(cond);    /* LE -> GE */
+            cond = tcg_invert_cond(cond);  /* GE -> LT */
+            flags ^= SETCOND_INV;
+        }
         break;
-    case TCG_COND_GTU:
-        tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false);
+    default:
         break;
+    }
+
+    switch (cond) {
+    case TCG_COND_NE:
+        flags |= SETCOND_NEZ;
+        if (!c2) {
+            tcg_out_opc_xor(s, ret, arg1, arg2);
+        } else if (arg2 == 0) {
+            ret = arg1;
+        } else if (arg2 >= 0 && arg2 <= 0xfff) {
+            tcg_out_opc_xori(s, ret, arg1, arg2);
+        } else {
+            tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2);
+        }
+        break;
+
+    case TCG_COND_LT:
+    case TCG_COND_LTU:
+        if (c2) {
+            if (arg2 >= -0x800 && arg2 <= 0x7ff) {
+                if (cond == TCG_COND_LT) {
+                    tcg_out_opc_slti(s, ret, arg1, arg2);
+                } else {
+                    tcg_out_opc_sltui(s, ret, arg1, arg2);
+                }
+                break;
+            }
+            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
+            arg2 = TCG_REG_TMP0;
+        }
+        if (cond == TCG_COND_LT) {
+            tcg_out_opc_slt(s, ret, arg1, arg2);
+        } else {
+            tcg_out_opc_sltu(s, ret, arg1, arg2);
+        }
+        break;
+
     default:
         g_assert_not_reached();
         break;
     }
+
+    return ret | flags;
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGReg arg1, tcg_target_long arg2, bool c2)
+{
+    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
+
+    if (tmpflags != ret) {
+        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
+
+        switch (tmpflags & SETCOND_FLAGS) {
+        case SETCOND_INV:
+            /* Intermediate result is boolean: simply invert. */
+            tcg_out_opc_xori(s, ret, tmp, 1);
+            break;
+        case SETCOND_NEZ:
+            /* Intermediate result is zero/non-zero: test != 0. */
+            tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
+            break;
+        case SETCOND_NEZ | SETCOND_INV:
+            /* Intermediate result is zero/non-zero: test == 0. */
+            tcg_out_opc_sltui(s, ret, tmp, 1);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
 }
 
 /*
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_ctz_i64:
         return C_O1_I2(r, r, rW);
 
-    case INDEX_op_setcond_i32:
-    case INDEX_op_setcond_i64:
-        return C_O1_I2(r, r, rZ);
-
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
         /* Must deposit into the same register as input */
         return C_O1_I2(r, 0, rZ);
 
     case INDEX_op_sub_i32:
+    case INDEX_op_setcond_i32:
         return C_O1_I2(r, rZ, ri);
     case INDEX_op_sub_i64:
+    case INDEX_op_setcond_i64:
         return C_O1_I2(r, rZ, rJ);
 
     case INDEX_op_mul_i32:
-- 
2.34.1

Reviewed-by: WANG Xuerui <git@xen0n.name>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target-con-set.h |  1 +
 tcg/loongarch64/tcg-target.h         |  4 ++--
 tcg/loongarch64/tcg-target.c.inc     | 33 ++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)

The old implementation replaces two insns, swapping between

b       <dest>
        nop
and
        pcaddu18i tmp, <dest>
        jirl      zero, tmp, <dest> & 0xffff

There is a race condition in which a thread could be stopped at
the jirl, i.e. with the top of the address loaded, and when
restarted we have re-linked to a different TB, so that the top
half no longer matches the bottom half.

Note that while we never directly re-link to a different TB, we
can link, unlink, and link again all while the stopped thread
remains stopped.

The new implementation replaces only one insn, swapping between

b       <dest>
and
        pcadd   tmp, <jmp_addr>

falling through to load the address from tmp, and branch.

Reviewed-by: WANG Xuerui <git@xen0n.name>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target.h     |  7 +---
 tcg/loongarch64/tcg-target.c.inc | 72 ++++++++++++++------------------
 2 files changed, 33 insertions(+), 46 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_NB_REGS 32
-/*
- * PCADDU18I + JIRL sequence can give 20 + 16 + 2 = 38 bits
- * signed offset, which is +/- 128 GiB.
- */
-#define MAX_CODE_GEN_BUFFER_SIZE  (128 * GiB)
+
+#define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
 typedef enum {
     TCG_REG_ZERO,
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
 #endif
 }
 
-/* LoongArch uses `andi zero, zero, 0` as NOP.  */
-#define NOP OPC_ANDI
-static void tcg_out_nop(TCGContext *s)
-{
-    tcg_out32(s, NOP);
-}
-
-void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
-                              uintptr_t jmp_rx, uintptr_t jmp_rw)
-{
-    tcg_insn_unit i1, i2;
-    ptrdiff_t upper, lower;
-    uintptr_t addr = tb->jmp_target_addr[n];
-    ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2;
-
-    if (offset == sextreg(offset, 0, 26)) {
-        i1 = encode_sd10k16_insn(OPC_B, offset);
-        i2 = NOP;
-    } else {
-        tcg_debug_assert(offset == sextreg(offset, 0, 36));
-        lower = (int16_t)offset;
-        upper = (offset - lower) >> 16;
-
-        i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper);
-        i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower);
-    }
-    uint64_t pair = ((uint64_t)i2 << 32) | i1;
-    qatomic_set((uint64_t *)jmp_rw, pair);
-    flush_idcache_range(jmp_rx, jmp_rw, 8);
-}
-
 /*
  * Entry-points
  */
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
 static void tcg_out_goto_tb(TCGContext *s, int which)
 {
     /*
-     * Ensure that patch area is 8-byte aligned so that an
-     * atomic write can be used to patch the target address.
+     * Direct branch, or load indirect address, to be patched
+     * by tb_target_set_jmp_target.  Check indirect load offset
+     * in range early, regardless of direct branch distance,
+     * via assert within tcg_out_opc_pcaddu2i.
      */
-    if ((uintptr_t)s->code_ptr & 7) {
-        tcg_out_nop(s);
-    }
+    uintptr_t i_addr = get_jmp_target_addr(s, which);
+    intptr_t i_disp = tcg_pcrel_diff(s, (void *)i_addr);
+
     set_jmp_insn_offset(s, which);
-    /*
-     * actual branch destination will be patched by
-     * tb_target_set_jmp_target later
-     */
-    tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
+    tcg_out_opc_pcaddu2i(s, TCG_REG_TMP0, i_disp >> 2);
+
+    /* Finish the load and indirect branch. */
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_TMP0, 0);
     tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
     set_jmp_reset_offset(s, which);
 }
 
+void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
+                              uintptr_t jmp_rx, uintptr_t jmp_rw)
+{
+    uintptr_t d_addr = tb->jmp_target_addr[n];
+    ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_rx) >> 2;
+    tcg_insn_unit insn;
+
+    /* Either directly branch, or load slot address for indirect branch. */
+    if (d_disp == sextreg(d_disp, 0, 26)) {
+        insn = encode_sd10k16_insn(OPC_B, d_disp);
+    } else {
+        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
+        intptr_t i_disp = i_addr - jmp_rx;
+        insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2);
+    }
+
+    qatomic_set((tcg_insn_unit *)jmp_rw, insn);
+    flush_idcache_range(jmp_rx, jmp_rw, 4);
+}
+
 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
-- 
2.34.1