Series comparison

-[PULL 0/5] tcg patch queue
+[PULL 00/11] tcg patch queue
-The following changes since commit 40c67636f67c2a89745f2e698522fe917326a952:
+The following changes since commit 6eeea6725a70e6fcb5abba0764496bdab07ddfb3:
-  Merge remote-tracking branch 'remotes/kraxel/tags/usb-20200317-pull-request' into staging (2020-03-17 14:00:56 +0000)
+  Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2020-10-06' into staging (2020-10-06 21:13:34 +0100)
 are available in the Git repository at:
-  https://github.com/rth7680/qemu.git tags/pull-tcg-20200317
+  https://github.com/rth7680/qemu.git tags/pull-tcg-20201008
-for you to fetch changes up to 0270bd503e3699b7202200a2d693ad1feb57473f:
+for you to fetch changes up to 62475e9d007d83db4d0a6ccebcda8914f392e9c9:
-  tcg: Remove tcg-runtime-gvec.c DO_CMP0 (2020-03-17 08:41:07 -0700)
+  accel/tcg: Fix computing of is_write for MIPS (2020-10-08 05:57:32 -0500)
 ----------------------------------------------------------------
-Fix tcg/i386 bug vs sari_vec.
+Extend maximum gvec vector size
-Fix tcg-runtime-gvec.c vs i386 without avx.
+Fix i386 avx2 dupi
 Fix mips host user-only write detection
 Misc cleanups.
 ----------------------------------------------------------------
-Richard Henderson (5):
+Kele Huang (1):
-      tcg/i386: Bound shift count expanding sari_vec
+      accel/tcg: Fix computing of is_write for MIPS
       tcg: Remove CONFIG_VECTOR16
       tcg: Tidy tcg-runtime-gvec.c types
       tcg: Tidy tcg-runtime-gvec.c DUP*
       tcg: Remove tcg-runtime-gvec.c DO_CMP0
- configure                    |  56 --------
+Richard Henderson (10):
- accel/tcg/tcg-runtime-gvec.c | 298 +++++++++++++++++--------------------------
+      tcg: Adjust simd_desc size encoding
- tcg/i386/tcg-target.inc.c    |   9 +-
+      tcg: Drop union from TCGArgConstraint
-files changed, 122 insertions(+), 241 deletions(-)
+      tcg: Move sorted_args into TCGArgConstraint.sort_index
       tcg: Remove TCG_CT_REG
       tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields
       tcg: Remove TCGOpDef.used
       tcg/i386: Fix dupi for avx2 32-bit hosts
       tcg: Fix generation of dupi_vec for 32-bit host
       tcg/optimize: Fold dup2_vec
       tcg: Remove TCG_TARGET_HAS_cmp_vec
+ include/tcg/tcg-gvec-desc.h  | 38 ++++++++++++------
+ include/tcg/tcg.h            | 22 ++++------
+ tcg/aarch64/tcg-target.h     |  1 -
+ tcg/i386/tcg-target.h        |  1 -
+ tcg/ppc/tcg-target.h         |  1 -
+ accel/tcg/user-exec.c        | 43 ++++++++++++++++++--
+ tcg/optimize.c               | 15 +++++++
+ tcg/tcg-op-gvec.c            | 35 ++++++++++++----
+ tcg/tcg-op-vec.c             | 12 ++++--
+ tcg/tcg.c                    | 96 +++++++++++++++++++-------------------------
+ tcg/aarch64/tcg-target.c.inc | 17 ++++----
+ tcg/arm/tcg-target.c.inc     | 29 ++++++-------
+ tcg/i386/tcg-target.c.inc    | 39 +++++++-----------
+ tcg/mips/tcg-target.c.inc    | 21 +++++-----
+ tcg/ppc/tcg-target.c.inc     | 29 ++++++-------
+ tcg/riscv/tcg-target.c.inc   | 16 ++++----
+ tcg/s390/tcg-target.c.inc    | 22 +++++-----
+ tcg/sparc/tcg-target.c.inc   | 21 ++++------
+ tcg/tci/tcg-target.c.inc     |  3 +-
+files changed, 244 insertions(+), 217 deletions(-)

-New patch
+[PULL 01/11] tcg: Adjust simd_desc size encoding
+With larger vector sizes, it turns out oprsz == maxsz, and we only
+need to represent mismatch for oprsz <= 32.  We do, however, need
+to represent larger oprsz and do so without reducing SIMD_DATA_BITS.
+Reduce the size of the oprsz field and increase the maxsz field.
+Steal the oprsz value of 24 to indicate equality with maxsz.
+Tested-by: Frank Chang <frank.chang@sifive.com>
+Reviewed-by: Frank Chang <frank.chang@sifive.com>
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ include/tcg/tcg-gvec-desc.h | 38 ++++++++++++++++++++++++-------------
+ tcg/tcg-op-gvec.c           | 35 ++++++++++++++++++++++++++--------
+files changed, 52 insertions(+), 21 deletions(-)
+diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg-gvec-desc.h
++++ b/include/tcg/tcg-gvec-desc.h
+@@ -XXX,XX +XXX,XX @@
+ #ifndef TCG_TCG_GVEC_DESC_H
+ #define TCG_TCG_GVEC_DESC_H
+-/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
+-#define SIMD_OPRSZ_SHIFT   0
+-#define SIMD_OPRSZ_BITS    5
++/*
++ * This configuration allows MAXSZ to represent 2048 bytes, and
++ * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32.
++ *
++ * Encode this with:
++ *   0, 1, 3 -> 8, 16, 32
++ *   2       -> maxsz
++ *
++ * This steals the input that would otherwise map to 24 to match maxsz.
++ */
++#define SIMD_MAXSZ_SHIFT   0
++#define SIMD_MAXSZ_BITS    8
+-#define SIMD_MAXSZ_SHIFT   (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
+-#define SIMD_MAXSZ_BITS    5
++#define SIMD_OPRSZ_SHIFT   (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
++#define SIMD_OPRSZ_BITS    2
+-#define SIMD_DATA_SHIFT    (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
++#define SIMD_DATA_SHIFT    (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
+ #define SIMD_DATA_BITS     (32 - SIMD_DATA_SHIFT)
+ /* Create a descriptor from components.  */
+ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
+-/* Extract the operation size from a descriptor.  */
+-static inline intptr_t simd_oprsz(uint32_t desc)
+-{
+-    return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
+-}
+-
+ /* Extract the max vector size from a descriptor.  */
+ static inline intptr_t simd_maxsz(uint32_t desc)
+ {
+-    return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
++    return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8;
++}
++
++/* Extract the operation size from a descriptor.  */
++static inline intptr_t simd_oprsz(uint32_t desc)
++{
++    uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS);
++    intptr_t o = f * 8 + 8;
++    intptr_t m = simd_maxsz(desc);
++    return f == 2 ? m : o;
+ }
+ /* Extract the operation-specific data from a descriptor.  */
+diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg-op-gvec.c
++++ b/tcg/tcg-op-gvec.c
+@@ -XXX,XX +XXX,XX @@ static const TCGOpcode vecop_list_empty[1] = { 0 };
+    of the operand offsets so that we can check them all at once.  */
+ static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
+ {
+-    uint32_t opr_align = oprsz >= 16 ? 15 : 7;
+-    uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
+-    tcg_debug_assert(oprsz > 0);
+-    tcg_debug_assert(oprsz <= maxsz);
+-    tcg_debug_assert((oprsz & opr_align) == 0);
++    uint32_t max_align;
++
++    switch (oprsz) {
++    case 8:
++    case 16:
++    case 32:
++        tcg_debug_assert(oprsz <= maxsz);
++        break;
++    default:
++        tcg_debug_assert(oprsz == maxsz);
++        break;
++    }
++    tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS));
++
++    max_align = maxsz >= 16 ? 15 : 7;
+     tcg_debug_assert((maxsz & max_align) == 0);
+     tcg_debug_assert((ofs & max_align) == 0);
+ }
+@@ -XXX,XX +XXX,XX @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
+ {
+     uint32_t desc = 0;
+-    assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
+-    assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
+-    assert(data == sextract32(data, 0, SIMD_DATA_BITS));
++    check_size_align(oprsz, maxsz, 0);
++    tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS));
+     oprsz = (oprsz / 8) - 1;
+     maxsz = (maxsz / 8) - 1;
++
++    /*
++     * We have just asserted in check_size_align that either
++     * oprsz is {8,16,32} or matches maxsz.  Encode the final
++     * case with '2', as that would otherwise map to 24.
++     */
++    if (oprsz == maxsz) {
++        oprsz = 2;
++    }
++
+     desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
+     desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
+     desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
+--
+.25.1

-New patch
+[PULL 02/11] tcg: Drop union from TCGArgConstraint
+The union is unused; let "regs" appear in the main structure
+without the "u.regs" wrapping.
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ include/tcg/tcg.h            |  4 +---
+ tcg/tcg.c                    | 22 +++++++++++-----------
+ tcg/aarch64/tcg-target.c.inc | 14 +++++++-------
+ tcg/arm/tcg-target.c.inc     | 26 +++++++++++++-------------
+ tcg/i386/tcg-target.c.inc    | 26 +++++++++++++-------------
+ tcg/mips/tcg-target.c.inc    | 18 +++++++++---------
+ tcg/ppc/tcg-target.c.inc     | 24 ++++++++++++------------
+ tcg/riscv/tcg-target.c.inc   | 14 +++++++-------
+ tcg/s390/tcg-target.c.inc    | 18 +++++++++---------
+ tcg/sparc/tcg-target.c.inc   | 16 ++++++++--------
+ tcg/tci/tcg-target.c.inc     |  2 +-
+files changed, 91 insertions(+), 93 deletions(-)
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
+ typedef struct TCGArgConstraint {
+     uint16_t ct;
+     uint8_t alias_index;
+-    union {
+-        TCGRegSet regs;
+-    } u;
++    TCGRegSet regs;
+ } TCGArgConstraint;
+ #define TCG_MAX_OP_ARGS 16
+diff --git a/tcg/tcg.c b/tcg/tcg.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg.c
++++ b/tcg/tcg.c
+@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
+             return 0;
+         n = 0;
+         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+-            if (tcg_regset_test_reg(arg_ct->u.regs, i))
++            if (tcg_regset_test_reg(arg_ct->regs, i))
+                 n++;
+         }
+     }
+@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
+             /* Incomplete TCGTargetOpDef entry. */
+             tcg_debug_assert(ct_str != NULL);
+-            def->args_ct[i].u.regs = 0;
++            def->args_ct[i].regs = 0;
+             def->args_ct[i].ct = 0;
+             while (*ct_str != '\0') {
+                 switch(*ct_str) {
+@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
+                     pset = la_temp_pref(ts);
+                     set = *pset;
+-                    set &= ct->u.regs;
++                    set &= ct->regs;
+                     if (ct->ct & TCG_CT_IALIAS) {
+                         set &= op->output_pref[ct->alias_index];
+                     }
+                     /* If the combination is not possible, restart.  */
+                     if (set == 0) {
+-                        set = ct->u.regs;
++                        set = ct->regs;
+                     }
+                     *pset = set;
+                 }
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
+         return;
+     }
+-    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
+-    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
++    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
++    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
+     /* Allocate the output register now.  */
+     if (ots->val_type != TEMP_VAL_REG) {
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+             }
+         }
+-        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
++        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
+         reg = ts->reg;
+-        if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
++        if (tcg_regset_test_reg(arg_ct->regs, reg)) {
+             /* nothing to do : the constraint is satisfied */
+         } else {
+         allocate_in_reg:
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+                and move the temporary register into it */
+             temp_load(s, ts, tcg_target_available_regs[ts->type],
+                       i_allocated_regs, 0);
+-            reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
++            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
+                                 o_preferred_regs, ts->indirect_base);
+             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
+                 /*
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+                 && !const_args[arg_ct->alias_index]) {
+                 reg = new_args[arg_ct->alias_index];
+             } else if (arg_ct->ct & TCG_CT_NEWREG) {
+-                reg = tcg_reg_alloc(s, arg_ct->u.regs,
++                reg = tcg_reg_alloc(s, arg_ct->regs,
+                                     i_allocated_regs | o_allocated_regs,
+                                     op->output_pref[k], ts->indirect_base);
+             } else {
+-                reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
++                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
+                                     op->output_pref[k], ts->indirect_base);
+             }
+             tcg_regset_set_reg(o_allocated_regs, reg);
+diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/aarch64/tcg-target.c.inc
++++ b/tcg/aarch64/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     switch (*ct_str++) {
+     case 'r': /* general registers */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs |= 0xffffffffu;
++        ct->regs |= 0xffffffffu;
+         break;
+     case 'w': /* advsimd registers */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs |= 0xffffffff00000000ull;
++        ct->regs |= 0xffffffff00000000ull;
+         break;
+     case 'l': /* qemu_ld / qemu_st address, data_reg */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffffu;
++        ct->regs = 0xffffffffu;
+ #ifdef CONFIG_SOFTMMU
+         /* x0 and x1 will be overwritten when reading the tlb entry,
+            and x2, and x3 for helper args, better to avoid using them. */
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
+ #endif
+         break;
+     case 'A': /* Valid for arithmetic immediate (positive or negative).  */
+diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/arm/tcg-target.c.inc
++++ b/tcg/arm/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     case 'r':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffff;
++        ct->regs = 0xffff;
+         break;
+     /* qemu_ld address */
+     case 'l':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffff;
++        ct->regs = 0xffff;
+ #ifdef CONFIG_SOFTMMU
+         /* r0-r2,lr will be overwritten when reading the tlb entry,
+            so don't use these. */
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
+ #endif
+         break;
+     /* qemu_st address & data */
+     case 's':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffff;
++        ct->regs = 0xffff;
+         /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
+            and r0-r1 doing the byte swapping, so don't use these. */
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
+ #if defined(CONFIG_SOFTMMU)
+         /* Avoid clashes with registers being used for helper args */
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
+ #if TARGET_LONG_BITS == 64
+         /* Avoid clashes with registers being used for helper args */
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+ #endif
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
+ #endif
+         break;
+diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/i386/tcg-target.c.inc
++++ b/tcg/i386/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     switch(*ct_str++) {
+     case 'a':
+         ct->ct |= TCG_CT_REG;
+-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
++        tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
+         break;
+     case 'b':
+         ct->ct |= TCG_CT_REG;
+-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
++        tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
+         break;
+     case 'c':
+         ct->ct |= TCG_CT_REG;
+-        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
++        tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
+         break;
+     case 'd':
+         ct->ct |= TCG_CT_REG;
+-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
++        tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
+         break;
+     case 'S':
+         ct->ct |= TCG_CT_REG;
+-        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
++        tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
+         break;
+     case 'D':
+         ct->ct |= TCG_CT_REG;
+-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
++        tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
+         break;
+     case 'q':
+         /* A register that can be used as a byte operand.  */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
++        ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
+         break;
+     case 'Q':
+         /* A register with an addressable second byte (e.g. %ah).  */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xf;
++        ct->regs = 0xf;
+         break;
+     case 'r':
+         /* A general register.  */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs |= ALL_GENERAL_REGS;
++        ct->regs |= ALL_GENERAL_REGS;
+         break;
+     case 'W':
+         /* With TZCNT/LZCNT, we can have operand-size as an input.  */
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     case 'x':
+         /* A vector register.  */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs |= ALL_VECTOR_REGS;
++        ct->regs |= ALL_VECTOR_REGS;
+         break;
+         /* qemu_ld/st address constraint */
+     case 'L':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
++        ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
++        tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
+         break;
+     case 'e':
+diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/mips/tcg-target.c.inc
++++ b/tcg/mips/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     switch(*ct_str++) {
+     case 'r':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
++        ct->regs = 0xffffffff;
+         break;
+     case 'L': /* qemu_ld input arg constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
++        ct->regs = 0xffffffff;
++        tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
+ #if defined(CONFIG_SOFTMMU)
+         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
++            tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
+         }
+ #endif
+         break;
+     case 'S': /* qemu_st constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
++        ct->regs = 0xffffffff;
++        tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
+ #if defined(CONFIG_SOFTMMU)
+         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
+-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
++            tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
++            tcg_regset_reset_reg(ct->regs, TCG_REG_A3);
+         } else {
+-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
++            tcg_regset_reset_reg(ct->regs, TCG_REG_A1);
+         }
+ #endif
+         break;
+diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/ppc/tcg-target.c.inc
++++ b/tcg/ppc/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     switch (*ct_str++) {
+     case 'A': case 'B': case 'C': case 'D':
+         ct->ct |= TCG_CT_REG;
+-        tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
++        tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
+         break;
+     case 'r':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
++        ct->regs = 0xffffffff;
+         break;
+     case 'v':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff00000000ull;
++        ct->regs = 0xffffffff00000000ull;
+         break;
+     case 'L':                   /* qemu_ld constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
++        ct->regs = 0xffffffff;
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+ #ifdef CONFIG_SOFTMMU
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
+ #endif
+         break;
+     case 'S':                   /* qemu_st constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
++        ct->regs = 0xffffffff;
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+ #ifdef CONFIG_SOFTMMU
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
+ #endif
+         break;
+     case 'I':
+diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/riscv/tcg-target.c.inc
++++ b/tcg/riscv/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     switch (*ct_str++) {
+     case 'r':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
++        ct->regs = 0xffffffff;
+         break;
+     case 'L':
+         /* qemu_ld/qemu_st constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
++        ct->regs = 0xffffffff;
+         /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
+ #if defined(CONFIG_SOFTMMU)
+-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
+-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
+-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
+-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]);
+-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]);
++        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[0]);
++        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[1]);
++        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[2]);
++        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[3]);
++        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[4]);
+ #endif
+         break;
+     case 'I':
+diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/s390/tcg-target.c.inc
++++ b/tcg/s390/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     switch (*ct_str++) {
+     case 'r':                  /* all registers */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffff;
++        ct->regs = 0xffff;
+         break;
+     case 'L':                  /* qemu_ld/st constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffff;
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
++        ct->regs = 0xffff;
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
+         break;
+     case 'a':                  /* force R2 for division */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0;
+-        tcg_regset_set_reg(ct->u.regs, TCG_REG_R2);
++        ct->regs = 0;
++        tcg_regset_set_reg(ct->regs, TCG_REG_R2);
+         break;
+     case 'b':                  /* force R3 for division */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0;
+-        tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
++        ct->regs = 0;
++        tcg_regset_set_reg(ct->regs, TCG_REG_R3);
+         break;
+     case 'A':
+         ct->ct |= TCG_CT_CONST_S33;
+diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/sparc/tcg-target.c.inc
++++ b/tcg/sparc/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     switch (*ct_str++) {
+     case 'r':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
++        ct->regs = 0xffffffff;
+         break;
+     case 'R':
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = ALL_64;
++        ct->regs = ALL_64;
+         break;
+     case 'A': /* qemu_ld/st address constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
++        ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
+     reserve_helpers:
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
+-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_O1);
++        tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
+         break;
+     case 's': /* qemu_st data 32-bit constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = 0xffffffff;
++        ct->regs = 0xffffffff;
+         goto reserve_helpers;
+     case 'S': /* qemu_st data 64-bit constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = ALL_64;
++        ct->regs = ALL_64;
+         goto reserve_helpers;
+     case 'I':
+         ct->ct |= TCG_CT_CONST_S11;
+diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tci/tcg-target.c.inc
++++ b/tcg/tci/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     case 'L':                   /* qemu_ld constraint */
+     case 'S':                   /* qemu_st constraint */
+         ct->ct |= TCG_CT_REG;
+-        ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1;
++        ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
+         break;
+     default:
+         return NULL;
+--
+.25.1

-New patch
+[PULL 03/11] tcg: Move sorted_args into TCGArgConstraint.sort_index
+This uses an existing hole in the TCGArgConstraint structure
+and will be convenient for keeping the data in one place.
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ include/tcg/tcg.h |  2 +-
+ tcg/tcg.c         | 35 +++++++++++++++++------------------
+files changed, 18 insertions(+), 19 deletions(-)
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
+ typedef struct TCGArgConstraint {
+     uint16_t ct;
+     uint8_t alias_index;
++    uint8_t sort_index;
+     TCGRegSet regs;
+ } TCGArgConstraint;
+@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
+     uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
+     uint8_t flags;
+     TCGArgConstraint *args_ct;
+-    int *sorted_args;
+ #if defined(CONFIG_DEBUG_TCG)
+     int used;
+ #endif
+diff --git a/tcg/tcg.c b/tcg/tcg.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg.c
++++ b/tcg/tcg.c
+@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
+     int op, total_args, n, i;
+     TCGOpDef *def;
+     TCGArgConstraint *args_ct;
+-    int *sorted_args;
+     TCGTemp *ts;
+     memset(s, 0, sizeof(*s));
+@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
+     }
+     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
+-    sorted_args = g_malloc(sizeof(int) * total_args);
+     for(op = 0; op < NB_OPS; op++) {
+         def = &tcg_op_defs[op];
+         def->args_ct = args_ct;
+-        def->sorted_args = sorted_args;
+         n = def->nb_iargs + def->nb_oargs;
+-        sorted_args += n;
+         args_ct += n;
+     }
+@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
+ /* sort from highest priority to lowest */
+ static void sort_constraints(TCGOpDef *def, int start, int n)
+ {
+-    int i, j, p1, p2, tmp;
++    int i, j;
++    TCGArgConstraint *a = def->args_ct;
+-    for(i = 0; i < n; i++)
+-        def->sorted_args[start + i] = start + i;
+-    if (n <= 1)
++    for (i = 0; i < n; i++) {
++        a[start + i].sort_index = start + i;
++    }
++    if (n <= 1) {
+         return;
+-    for(i = 0; i < n - 1; i++) {
+-        for(j = i + 1; j < n; j++) {
+-            p1 = get_constraint_priority(def, def->sorted_args[start + i]);
+-            p2 = get_constraint_priority(def, def->sorted_args[start + j]);
++    }
++    for (i = 0; i < n - 1; i++) {
++        for (j = i + 1; j < n; j++) {
++            int p1 = get_constraint_priority(def, a[start + i].sort_index);
++            int p2 = get_constraint_priority(def, a[start + j].sort_index);
+             if (p1 < p2) {
+-                tmp = def->sorted_args[start + i];
+-                def->sorted_args[start + i] = def->sorted_args[start + j];
+-                def->sorted_args[start + j] = tmp;
++                int tmp = a[start + i].sort_index;
++                a[start + i].sort_index = a[start + j].sort_index;
++                a[start + j].sort_index = tmp;
+             }
+         }
+     }
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+     for (k = 0; k < nb_iargs; k++) {
+         TCGRegSet i_preferred_regs, o_preferred_regs;
+-        i = def->sorted_args[nb_oargs + k];
++        i = def->args_ct[nb_oargs + k].sort_index;
+         arg = op->args[i];
+         arg_ct = &def->args_ct[i];
+         ts = arg_temp(arg);
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+                     int k2, i2;
+                     reg = ts->reg;
+                     for (k2 = 0 ; k2 < k ; k2++) {
+-                        i2 = def->sorted_args[nb_oargs + k2];
++                        i2 = def->args_ct[nb_oargs + k2].sort_index;
+                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
+                             reg == new_args[i2]) {
+                             goto allocate_in_reg;
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+         /* satisfy the output constraints */
+         for(k = 0; k < nb_oargs; k++) {
+-            i = def->sorted_args[k];
++            i = def->args_ct[k].sort_index;
+             arg = op->args[i];
+             arg_ct = &def->args_ct[i];
+             ts = arg_temp(arg);
+--
+.25.1

-New patch
+[PULL 04/11] tcg: Remove TCG_CT_REG
+This wasn't actually used for anything, really.  All variable
+operands must accept registers, and which are indicated by the
+set in TCGArgConstraint.regs.
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ include/tcg/tcg.h            |  1 -
+ tcg/tcg.c                    | 15 ++++-----------
+ tcg/aarch64/tcg-target.c.inc |  3 ---
+ tcg/arm/tcg-target.c.inc     |  3 ---
+ tcg/i386/tcg-target.c.inc    | 11 -----------
+ tcg/mips/tcg-target.c.inc    |  3 ---
+ tcg/ppc/tcg-target.c.inc     |  5 -----
+ tcg/riscv/tcg-target.c.inc   |  2 --
+ tcg/s390/tcg-target.c.inc    |  4 ----
+ tcg/sparc/tcg-target.c.inc   |  5 -----
+ tcg/tci/tcg-target.c.inc     |  1 -
+files changed, 4 insertions(+), 49 deletions(-)
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
+ #define TCG_CT_ALIAS  0x80
+ #define TCG_CT_IALIAS 0x40
+ #define TCG_CT_NEWREG 0x20 /* output requires a new register */
+-#define TCG_CT_REG    0x01
+ #define TCG_CT_CONST  0x02 /* any constant of register size */
+ typedef struct TCGArgConstraint {
+diff --git a/tcg/tcg.c b/tcg/tcg.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg.c
++++ b/tcg/tcg.c
+@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
+ /* we give more priority to constraints with less registers */
+ static int get_constraint_priority(const TCGOpDef *def, int k)
+ {
+-    const TCGArgConstraint *arg_ct;
++    const TCGArgConstraint *arg_ct = &def->args_ct[k];
++    int n;
+-    int i, n;
+-    arg_ct = &def->args_ct[k];
+     if (arg_ct->ct & TCG_CT_ALIAS) {
+         /* an alias is equivalent to a single register */
+         n = 1;
+     } else {
+-        if (!(arg_ct->ct & TCG_CT_REG))
+-            return 0;
+-        n = 0;
+-        for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+-            if (tcg_regset_test_reg(arg_ct->regs, i))
+-                n++;
+-        }
++        n = ctpop64(arg_ct->regs);
+     }
+     return TCG_TARGET_NB_REGS - n + 1;
+ }
+@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
+                         int oarg = *ct_str - '0';
+                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
+                         tcg_debug_assert(oarg < def->nb_oargs);
+-                        tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
++                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
+                         /* TCG_CT_ALIAS is for the output arguments.
+                            The input is tagged with TCG_CT_IALIAS. */
+                         def->args_ct[i] = def->args_ct[oarg];
+diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/aarch64/tcg-target.c.inc
++++ b/tcg/aarch64/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ {
+     switch (*ct_str++) {
+     case 'r': /* general registers */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs |= 0xffffffffu;
+         break;
+     case 'w': /* advsimd registers */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs |= 0xffffffff00000000ull;
+         break;
+     case 'l': /* qemu_ld / qemu_st address, data_reg */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffffu;
+ #ifdef CONFIG_SOFTMMU
+         /* x0 and x1 will be overwritten when reading the tlb entry,
+diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/arm/tcg-target.c.inc
++++ b/tcg/arm/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+         break;
+     case 'r':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffff;
+         break;
+     /* qemu_ld address */
+     case 'l':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffff;
+ #ifdef CONFIG_SOFTMMU
+         /* r0-r2,lr will be overwritten when reading the tlb entry,
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     /* qemu_st address & data */
+     case 's':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffff;
+         /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
+            and r0-r1 doing the byte swapping, so don't use these. */
+diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/i386/tcg-target.c.inc
++++ b/tcg/i386/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ {
+     switch(*ct_str++) {
+     case 'a':
+-        ct->ct |= TCG_CT_REG;
+         tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
+         break;
+     case 'b':
+-        ct->ct |= TCG_CT_REG;
+         tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
+         break;
+     case 'c':
+-        ct->ct |= TCG_CT_REG;
+         tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
+         break;
+     case 'd':
+-        ct->ct |= TCG_CT_REG;
+         tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
+         break;
+     case 'S':
+-        ct->ct |= TCG_CT_REG;
+         tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
+         break;
+     case 'D':
+-        ct->ct |= TCG_CT_REG;
+         tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
+         break;
+     case 'q':
+         /* A register that can be used as a byte operand.  */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
+         break;
+     case 'Q':
+         /* A register with an addressable second byte (e.g. %ah).  */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xf;
+         break;
+     case 'r':
+         /* A general register.  */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs |= ALL_GENERAL_REGS;
+         break;
+     case 'W':
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+         break;
+     case 'x':
+         /* A vector register.  */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs |= ALL_VECTOR_REGS;
+         break;
+         /* qemu_ld/st address constraint */
+     case 'L':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
+         tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
+         tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
+diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/mips/tcg-target.c.inc
++++ b/tcg/mips/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ {
+     switch(*ct_str++) {
+     case 'r':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         break;
+     case 'L': /* qemu_ld input arg constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
+ #if defined(CONFIG_SOFTMMU)
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ #endif
+         break;
+     case 'S': /* qemu_st constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
+ #if defined(CONFIG_SOFTMMU)
+diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/ppc/tcg-target.c.inc
++++ b/tcg/ppc/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ {
+     switch (*ct_str++) {
+     case 'A': case 'B': case 'C': case 'D':
+-        ct->ct |= TCG_CT_REG;
+         tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
+         break;
+     case 'r':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         break;
+     case 'v':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff00000000ull;
+         break;
+     case 'L':                   /* qemu_ld constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+ #ifdef CONFIG_SOFTMMU
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ #endif
+         break;
+     case 'S':                   /* qemu_st constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+ #ifdef CONFIG_SOFTMMU
+diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/riscv/tcg-target.c.inc
++++ b/tcg/riscv/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ {
+     switch (*ct_str++) {
+     case 'r':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         break;
+     case 'L':
+         /* qemu_ld/qemu_st constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
+ #if defined(CONFIG_SOFTMMU)
+diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/s390/tcg-target.c.inc
++++ b/tcg/s390/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ {
+     switch (*ct_str++) {
+     case 'r':                  /* all registers */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffff;
+         break;
+     case 'L':                  /* qemu_ld/st constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffff;
+         tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
+         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+         tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
+         break;
+     case 'a':                  /* force R2 for division */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0;
+         tcg_regset_set_reg(ct->regs, TCG_REG_R2);
+         break;
+     case 'b':                  /* force R3 for division */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0;
+         tcg_regset_set_reg(ct->regs, TCG_REG_R3);
+         break;
+diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/sparc/tcg-target.c.inc
++++ b/tcg/sparc/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+ {
+     switch (*ct_str++) {
+     case 'r':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         break;
+     case 'R':
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = ALL_64;
+         break;
+     case 'A': /* qemu_ld/st address constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
+     reserve_helpers:
+         tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+         tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
+         break;
+     case 's': /* qemu_st data 32-bit constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = 0xffffffff;
+         goto reserve_helpers;
+     case 'S': /* qemu_st data 64-bit constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = ALL_64;
+         goto reserve_helpers;
+     case 'I':
+diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tci/tcg-target.c.inc
++++ b/tcg/tci/tcg-target.c.inc
+@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
+     case 'r':
+     case 'L':                   /* qemu_ld constraint */
+     case 'S':                   /* qemu_st constraint */
+-        ct->ct |= TCG_CT_REG;
+         ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
+         break;
+     default:
+--
+.25.1

-New patch
+[PULL 05/11] tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields
+These are easier to set and test when they have their own fields.
+Reduce the size of alias_index and sort_index to 4 bits, which is
+sufficient for TCG_MAX_OP_ARGS.  This leaves only the bits indicating
+constants within the ct field.
+Move all initialization to allocation time, rather than init
+individual fields in process_op_defs.
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ include/tcg/tcg.h | 14 +++++++-------
+ tcg/tcg.c         | 28 ++++++++++++----------------
+files changed, 19 insertions(+), 23 deletions(-)
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void);
+ void tcg_dump_info(void);
+ void tcg_dump_op_count(void);
+-#define TCG_CT_ALIAS  0x80
+-#define TCG_CT_IALIAS 0x40
+-#define TCG_CT_NEWREG 0x20 /* output requires a new register */
+-#define TCG_CT_CONST  0x02 /* any constant of register size */
++#define TCG_CT_CONST  1 /* any constant of register size */
+ typedef struct TCGArgConstraint {
+-    uint16_t ct;
+-    uint8_t alias_index;
+-    uint8_t sort_index;
++    unsigned ct : 16;
++    unsigned alias_index : 4;
++    unsigned sort_index : 4;
++    bool oalias : 1;
++    bool ialias : 1;
++    bool newreg : 1;
+     TCGRegSet regs;
+ } TCGArgConstraint;
+diff --git a/tcg/tcg.c b/tcg/tcg.c
+index XXXXXXX..XXXXXXX 100644
+--- a/tcg/tcg.c
++++ b/tcg/tcg.c
+@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
+         total_args += n;
+     }
+-    args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
++    args_ct = g_new0(TCGArgConstraint, total_args);
+     for(op = 0; op < NB_OPS; op++) {
+         def = &tcg_op_defs[op];
+@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
+     const TCGArgConstraint *arg_ct = &def->args_ct[k];
+     int n;
+-    if (arg_ct->ct & TCG_CT_ALIAS) {
++    if (arg_ct->oalias) {
+         /* an alias is equivalent to a single register */
+         n = 1;
+     } else {
+@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
+             /* Incomplete TCGTargetOpDef entry. */
+             tcg_debug_assert(ct_str != NULL);
+-            def->args_ct[i].regs = 0;
+-            def->args_ct[i].ct = 0;
+             while (*ct_str != '\0') {
+                 switch(*ct_str) {
+                 case '0' ... '9':
+@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
+                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
+                         tcg_debug_assert(oarg < def->nb_oargs);
+                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
+-                        /* TCG_CT_ALIAS is for the output arguments.
+-                           The input is tagged with TCG_CT_IALIAS. */
+                         def->args_ct[i] = def->args_ct[oarg];
+-                        def->args_ct[oarg].ct |= TCG_CT_ALIAS;
++                        /* The output sets oalias.  */
++                        def->args_ct[oarg].oalias = true;
+                         def->args_ct[oarg].alias_index = i;
+-                        def->args_ct[i].ct |= TCG_CT_IALIAS;
++                        /* The input sets ialias. */
++                        def->args_ct[i].ialias = true;
+                         def->args_ct[i].alias_index = oarg;
+                     }
+                     ct_str++;
+                     break;
+                 case '&':
+-                    def->args_ct[i].ct |= TCG_CT_NEWREG;
++                    def->args_ct[i].newreg = true;
+                     ct_str++;
+                     break;
+                 case 'i':
+@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
+                     set = *pset;
+                     set &= ct->regs;
+-                    if (ct->ct & TCG_CT_IALIAS) {
++                    if (ct->ialias) {
+                         set &= op->output_pref[ct->alias_index];
+                     }
+                     /* If the combination is not possible, restart.  */
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+         }
+         i_preferred_regs = o_preferred_regs = 0;
+-        if (arg_ct->ct & TCG_CT_IALIAS) {
++        if (arg_ct->ialias) {
+             o_preferred_regs = op->output_pref[arg_ct->alias_index];
+             if (ts->fixed_reg) {
+                 /* if fixed register, we must allocate a new register
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+                     reg = ts->reg;
+                     for (k2 = 0 ; k2 < k ; k2++) {
+                         i2 = def->args_ct[nb_oargs + k2].sort_index;
+-                        if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
+-                            reg == new_args[i2]) {
++                        if (def->args_ct[i2].ialias && reg == new_args[i2]) {
+                             goto allocate_in_reg;
+                         }
+                     }
+@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+             /* ENV should not be modified.  */
+             tcg_debug_assert(!ts->fixed_reg);
+-            if ((arg_ct->ct & TCG_CT_ALIAS)
+-                && !const_args[arg_ct->alias_index]) {
++            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
+                 reg = new_args[arg_ct->alias_index];
+-            } else if (arg_ct->ct & TCG_CT_NEWREG) {
++            } else if (arg_ct->newreg) {
+                 reg = tcg_reg_alloc(s, arg_ct->regs,
+                                     i_allocated_regs | o_allocated_regs,
+                                     op->output_pref[k], ts->indirect_base);
+--
+.25.1

-New patch
+[PULL 06/11] tcg: Remove TCGOpDef.used
+The last user of this field disappeared in f69d277ece4.
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+---
+ include/tcg/tcg.h | 3 ---
+file changed, 3 deletions(-)
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
+     uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
+     uint8_t flags;
+     TCGArgConstraint *args_ct;
+-#if defined(CONFIG_DEBUG_TCG)
+-    int used;
+-#endif
+ } TCGOpDef;
+ extern TCGOpDef tcg_op_defs[];
+--
+.25.1

-[PULL 4/5] tcg: Tidy tcg-runtime-gvec.c DUP*
+[PULL 07/11] tcg/i386: Fix dupi for avx2 32-bit hosts
-Partial cleanup from the CONFIG_VECTOR16 removal.
+The previous change wrongly stated that 32-bit avx2 should have
-Replace the DUP* expansions with the scalar argument.
+used VPBROADCASTW.  But that's a 16-bit broadcast and we want a
 -bit broadcast.
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Fixes: 7b60ef3264e
 Cc: qemu-stable@nongnu.org
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/tcg-runtime-gvec.c | 50 +++++++++++-------------------------
+ tcg/i386/tcg-target.c.inc | 2 +-
-file changed, 15 insertions(+), 35 deletions(-)
+file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
+diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/tcg-runtime-gvec.c
+--- a/tcg/i386/tcg-target.c.inc
-+++ b/accel/tcg/tcg-runtime-gvec.c
++++ b/tcg/i386/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
- #include "tcg/tcg-gvec-desc.h"
+         new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
+     } else {
+         if (have_avx2) {
--#define DUP16(X)  X
+-            tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret);
--#define DUP8(X)   X
++            tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
--#define DUP4(X)   X
+         } else {
--#define DUP2(X)   X
+             tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
--
+         }
  static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
  {
      intptr_t maxsz = simd_maxsz(desc);
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
  void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint8_t vecb = (uint8_t)DUP16(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 -        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint16_t vecb = (uint16_t)DUP8(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 -        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint32_t vecb = (uint32_t)DUP4(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 -        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 -        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
  void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint8_t vecb = (uint8_t)DUP16(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 -        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint16_t vecb = (uint16_t)DUP8(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 -        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint32_t vecb = (uint32_t)DUP4(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 -        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 -        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
  void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint8_t vecb = (uint8_t)DUP16(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 -        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint16_t vecb = (uint16_t)DUP8(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 -        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint32_t vecb = (uint32_t)DUP4(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 -        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 -        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
  void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 -        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 -        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
      for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 -        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
      }
      clear_high(d, oprsz, desc);
  }
 --
-.20.1
+.25.1

-[PULL 3/5] tcg: Tidy tcg-runtime-gvec.c types
+[PULL 08/11] tcg: Fix generation of dupi_vec for 32-bit host
-Partial cleanup from the CONFIG_VECTOR16 removal.
+The definition of INDEX_op_dupi_vec is that it operates on
-Replace the vec* types with their scalar expansions.
+units of tcg_target_ulong -- in this case 32 bits.  It does
 not work to use this for a uint64_t value that happens to be
 small enough to fit in tcg_target_ulong.
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Fixes: d2fd745fe8b
 Fixes: db432672dc5
 Cc: qemu-stable@nongnu.org
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/tcg-runtime-gvec.c | 270 +++++++++++++++++------------------
+ tcg/tcg-op-vec.c | 12 ++++++++----
-file changed, 130 insertions(+), 140 deletions(-)
+file changed, 8 insertions(+), 4 deletions(-)
-diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
+diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/tcg-runtime-gvec.c
+--- a/tcg/tcg-op-vec.c
-+++ b/accel/tcg/tcg-runtime-gvec.c
++++ b/tcg/tcg-op-vec.c
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
- #include "tcg/tcg-gvec-desc.h"
+ void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
+ {
--typedef uint8_t vec8;
+-    if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
--typedef uint16_t vec16;
+-        do_dupi_vec(r, MO_32, a);
--typedef uint32_t vec32;
+-    } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
--typedef uint64_t vec64;
++    if (TCG_TARGET_REG_BITS == 64) {
--
+         do_dupi_vec(r, MO_64, a);
--typedef int8_t svec8;
++    } else if (a == dup_const(MO_32, a)) {
--typedef int16_t svec16;
++        do_dupi_vec(r, MO_32, a);
--typedef int32_t svec32;
+     } else {
--typedef int64_t svec64;
+         TCGv_i64 c = tcg_const_i64(a);
--
+         tcg_gen_dup_i64_vec(MO_64, r, c);
- #define DUP16(X)  X
+@@ -XXX,XX +XXX,XX @@ void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
- #define DUP8(X)   X
- #define DUP4(X)   X
+ void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
-@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
+ {
-     intptr_t oprsz = simd_oprsz(desc);
+-    do_dupi_vec(r, MO_REG, dup_const(vece, a));
-     intptr_t i;
++    if (vece == MO_64) {
++        tcg_gen_dup64i_vec(r, a);
--    for (i = 0; i < oprsz; i += sizeof(vec8)) {
++    } else {
--        *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
++        do_dupi_vec(r, MO_REG, dup_const(vece, a));
-+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
++    }
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
-@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
-     intptr_t oprsz = simd_oprsz(desc);
+ void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
  void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec8 vecb = (vec8)DUP16(b);
 +    uint8_t vecb = (uint8_t)DUP16(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec16 vecb = (vec16)DUP8(b);
 +    uint16_t vecb = (uint16_t)DUP8(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec32 vecb = (vec32)DUP4(b);
 +    uint32_t vecb = (uint32_t)DUP4(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec64 vecb = (vec64)DUP2(b);
 +    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
  void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec8 vecb = (vec8)DUP16(b);
 +    uint8_t vecb = (uint8_t)DUP16(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec16 vecb = (vec16)DUP8(b);
 +    uint16_t vecb = (uint16_t)DUP8(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec32 vecb = (vec32)DUP4(b);
 +    uint32_t vecb = (uint32_t)DUP4(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec64 vecb = (vec64)DUP2(b);
 +    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
  void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec8 vecb = (vec8)DUP16(b);
 +    uint8_t vecb = (uint8_t)DUP16(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec16 vecb = (vec16)DUP8(b);
 +    uint16_t vecb = (uint16_t)DUP8(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec32 vecb = (vec32)DUP4(b);
 +    uint32_t vecb = (uint32_t)DUP4(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec64 vecb = (vec64)DUP2(b);
 +    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(vec8 *)(d + i) = -*(vec8 *)(a + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = -*(vec16 *)(a + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = -*(vec32 *)(a + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = -*(vec64 *)(a + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i));
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i));
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i));
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
  void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec64 vecb = (vec64)DUP2(b);
 +    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec64 vecb = (vec64)DUP2(b);
 +    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
  void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
  {
      intptr_t oprsz = simd_oprsz(desc);
 -    vec64 vecb = (vec64)DUP2(b);
 +    uint64_t vecb = (uint64_t)DUP2(b);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec8)) {
 -        *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 +        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec16)) {
 -        *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 +        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec32)) {
 -        *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 +        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
      int shift = simd_data(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
      }
      clear_high(d, oprsz, desc);
  }
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
  }
  #define DO_CMP2(SZ) \
 -    DO_CMP1(gvec_eq##SZ, vec##SZ, ==)    \
 -    DO_CMP1(gvec_ne##SZ, vec##SZ, !=)    \
 -    DO_CMP1(gvec_lt##SZ, svec##SZ, <)    \
 -    DO_CMP1(gvec_le##SZ, svec##SZ, <=)   \
 -    DO_CMP1(gvec_ltu##SZ, vec##SZ, <)    \
 -    DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
 +    DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
 +    DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
 +    DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
 +    DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
 +    DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
 +    DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
  DO_CMP2(8)
  DO_CMP2(16)
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
      intptr_t oprsz = simd_oprsz(desc);
      intptr_t i;
 -    for (i = 0; i < oprsz; i += sizeof(vec64)) {
 -        vec64 aa = *(vec64 *)(a + i);
 -        vec64 bb = *(vec64 *)(b + i);
 -        vec64 cc = *(vec64 *)(c + i);
 -        *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa);
 +    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 +        uint64_t aa = *(uint64_t *)(a + i);
 +        uint64_t bb = *(uint64_t *)(b + i);
 +        uint64_t cc = *(uint64_t *)(c + i);
 +        *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
      }
      clear_high(d, oprsz, desc);
  }
 --
-.20.1
+.25.1

-[PULL 5/5] tcg: Remove tcg-runtime-gvec.c DO_CMP0
+[PULL 09/11] tcg/optimize: Fold dup2_vec
-Partial cleanup from the CONFIG_VECTOR16 removal.
+When the two arguments are identical, this can be reduced to
-Replace DO_CMP0 with its scalar expansion, a simple negation.
+dup_vec or to mov_vec from a tcg_constant_vec.
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/tcg-runtime-gvec.c | 5 +----
+ tcg/optimize.c | 15 +++++++++++++++
-file changed, 1 insertion(+), 4 deletions(-)
+file changed, 15 insertions(+)
-diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
+diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/tcg-runtime-gvec.c
+--- a/tcg/optimize.c
-+++ b/accel/tcg/tcg-runtime-gvec.c
++++ b/tcg/optimize.c
-@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
+@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-     clear_high(d, oprsz, desc);
+             }
- }
+             goto do_default;
--#define DO_CMP0(X)  -(X)
++        case INDEX_op_dup2_vec:
--
++            assert(TCG_TARGET_REG_BITS == 32);
- #define DO_CMP1(NAME, TYPE, OP)                                            \
++            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
++                tmp = arg_info(op->args[1])->val;
- {                                                                          \
++                if (tmp == arg_info(op->args[2])->val) {
-     intptr_t oprsz = simd_oprsz(desc);                                     \
++                    tcg_opt_gen_movi(s, op, op->args[0], tmp);
-     intptr_t i;                                                            \
++                    break;
-     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
++                }
--        *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i));  \
++            } else if (args_are_copies(op->args[1], op->args[2])) {
-+        *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
++                op->opc = INDEX_op_dup_vec;
-     }                                                                      \
++                TCGOP_VECE(op) = MO_32;
-     clear_high(d, oprsz, desc);                                            \
++                nb_iargs = 1;
- }
++            }
-@@ -XXX,XX +XXX,XX @@ DO_CMP2(16)
++            goto do_default;
- DO_CMP2(32)
++
- DO_CMP2(64)
+         CASE_OP_32_64(not):
+         CASE_OP_32_64(neg):
--#undef DO_CMP0
+         CASE_OP_32_64(ext8s):
  #undef DO_CMP1
  #undef DO_CMP2
 --
-.20.1
+.25.1

-[PULL 1/5] tcg/i386: Bound shift count expanding sari_vec
+[PULL 10/11] tcg: Remove TCG_TARGET_HAS_cmp_vec
-A given RISU testcase for SVE can produce
+The cmp_vec opcode is mandatory; this symbol is unused.
-tcg-op-vec.c:511: do_shifti: Assertion `i >= 0 && i < (8 << vece)' failed.
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 because expand_vec_sari gave a shift count of 32 to a MO_32
 vector shift.
 In 44f1441dbe1, we changed from direct expansion of vector opcodes
 to re-use of the tcg expanders.  So while the comment correctly notes
 that the hw will handle such a shift count, we now have to take our
 own sanity checks into account.  Which is easy in this particular case.
 Fixes: 44f1441dbe1
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/i386/tcg-target.inc.c | 9 ++++++---
+ tcg/aarch64/tcg-target.h | 1 -
-file changed, 6 insertions(+), 3 deletions(-)
+ tcg/i386/tcg-target.h    | 1 -
  tcg/ppc/tcg-target.h     | 1 -
 files changed, 3 deletions(-)
-diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
+diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/i386/tcg-target.inc.c
+--- a/tcg/aarch64/tcg-target.h
-+++ b/tcg/i386/tcg-target.inc.c
++++ b/tcg/aarch64/tcg-target.h
-@@ -XXX,XX +XXX,XX @@ static void expand_vec_sari(TCGType type, unsigned vece,
+@@ -XXX,XX +XXX,XX @@ typedef enum {
+ #define TCG_TARGET_HAS_shi_vec          1
-     case MO_64:
+ #define TCG_TARGET_HAS_shs_vec          0
-         if (imm <= 32) {
+ #define TCG_TARGET_HAS_shv_vec          1
--            /* We can emulate a small sign extend by performing an arithmetic
+-#define TCG_TARGET_HAS_cmp_vec          1
-+            /*
+ #define TCG_TARGET_HAS_mul_vec          1
-+             * We can emulate a small sign extend by performing an arithmetic
+ #define TCG_TARGET_HAS_sat_vec          1
-              * 32-bit shift and overwriting the high half of a 64-bit logical
+ #define TCG_TARGET_HAS_minmax_vec       1
--             * shift (note that the ISA says shift of 32 is valid).
+diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
-+             * shift.  Note that the ISA says shift of 32 is valid, but TCG
+index XXXXXXX..XXXXXXX 100644
-+             * does not, so we have to bound the smaller shift -- we get the
+--- a/tcg/i386/tcg-target.h
-+             * same result in the high half either way.
++++ b/tcg/i386/tcg-target.h
-              */
+@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
-             t1 = tcg_temp_new_vec(type);
+ #define TCG_TARGET_HAS_shi_vec          1
--            tcg_gen_sari_vec(MO_32, t1, v1, imm);
+ #define TCG_TARGET_HAS_shs_vec          1
-+            tcg_gen_sari_vec(MO_32, t1, v1, MIN(imm, 31));
+ #define TCG_TARGET_HAS_shv_vec          have_avx2
-             tcg_gen_shri_vec(MO_64, v0, v1, imm);
+-#define TCG_TARGET_HAS_cmp_vec          1
-             vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
+ #define TCG_TARGET_HAS_mul_vec          1
-                       tcgv_vec_arg(v0), tcgv_vec_arg(v0),
+ #define TCG_TARGET_HAS_sat_vec          1
  #define TCG_TARGET_HAS_minmax_vec       1
 diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/ppc/tcg-target.h
 +++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
  #define TCG_TARGET_HAS_shi_vec          0
  #define TCG_TARGET_HAS_shs_vec          0
  #define TCG_TARGET_HAS_shv_vec          1
 -#define TCG_TARGET_HAS_cmp_vec          1
  #define TCG_TARGET_HAS_mul_vec          1
  #define TCG_TARGET_HAS_sat_vec          1
  #define TCG_TARGET_HAS_minmax_vec       1
 --
-.20.1
+.25.1

-[PULL 2/5] tcg: Remove CONFIG_VECTOR16
+[PULL 11/11] accel/tcg: Fix computing of is_write for MIPS
-The comment in tcg-runtime-gvec.c about CONFIG_VECTOR16 says that
+From: Kele Huang <kele.hwang@gmail.com>
 tcg-op-gvec.c has eliminated size 8 vectors, and only passes on
 multiples of 16.  This may have been true of the first few operations,
 but is not true of all operations.
-In particular, multiply, shift by scalar, and compare of 8- and 16-bit
+Detect all MIPS store instructions in cpu_signal_handler for all available
-elements are not expanded inline if host vector operations are not
+MIPS versions, and set is_write if encountering such store instructions.
 supported.
-For an x86_64 host that does not support AVX, this means that we will
+This fixed the error while dealing with self-modified code for MIPS.
 fall back to the helper, which will attempt to use SSE instructions,
 which will SEGV on an invalid 8-byte aligned memory operation.
-This patch simply removes the CONFIG_VECTOR16 code and configuration
+Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-without further simplification.
+Signed-off-by: Kele Huang <kele.hwang@gmail.com>
+Signed-off-by: Xu Zou <iwatchnima@gmail.com>
-Buglink: https://bugs.launchpad.net/bugs/1863508
+Message-Id: <20201002081420.10814-1-kele.hwang@gmail.com>
 [rth: Use uintptr_t for pc to fix n32 build error.]
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- configure                    | 56 ------------------------------------
+ accel/tcg/user-exec.c | 43 +++++++++++++++++++++++++++++++++++++++----
- accel/tcg/tcg-runtime-gvec.c | 35 +---------------------
+file changed, 39 insertions(+), 4 deletions(-)
 files changed, 1 insertion(+), 90 deletions(-)
-diff --git a/configure b/configure
+diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
 index XXXXXXX..XXXXXXX 100755
 --- a/configure
 +++ b/configure
@@ -XXX,XX +XXX,XX @@ if  test "$plugins" = "yes" &&
        "for this purpose. You can't build with --static."
  fi
 -########################################
 -# See if 16-byte vector operations are supported.
 -# Even without a vector unit the compiler may expand these.
 -# There is a bug in old GCC for PPC that crashes here.
 -# Unfortunately it's the system compiler for Centos 7.
 -
 -cat > $TMPC << EOF
 -typedef unsigned char U1 __attribute__((vector_size(16)));
 -typedef unsigned short U2 __attribute__((vector_size(16)));
 -typedef unsigned int U4 __attribute__((vector_size(16)));
 -typedef unsigned long long U8 __attribute__((vector_size(16)));
 -typedef signed char S1 __attribute__((vector_size(16)));
 -typedef signed short S2 __attribute__((vector_size(16)));
 -typedef signed int S4 __attribute__((vector_size(16)));
 -typedef signed long long S8 __attribute__((vector_size(16)));
 -static U1 a1, b1;
 -static U2 a2, b2;
 -static U4 a4, b4;
 -static U8 a8, b8;
 -static S1 c1;
 -static S2 c2;
 -static S4 c4;
 -static S8 c8;
 -static int i;
 -void helper(void *d, void *a, int shift, int i);
 -void helper(void *d, void *a, int shift, int i)
 -{
 -  *(U1 *)(d + i) = *(U1 *)(a + i) << shift;
 -  *(U2 *)(d + i) = *(U2 *)(a + i) << shift;
 -  *(U4 *)(d + i) = *(U4 *)(a + i) << shift;
 -  *(U8 *)(d + i) = *(U8 *)(a + i) << shift;
 -}
 -int main(void)
 -{
 -  a1 += b1; a2 += b2; a4 += b4; a8 += b8;
 -  a1 -= b1; a2 -= b2; a4 -= b4; a8 -= b8;
 -  a1 *= b1; a2 *= b2; a4 *= b4; a8 *= b8;
 -  a1 &= b1; a2 &= b2; a4 &= b4; a8 &= b8;
 -  a1 |= b1; a2 |= b2; a4 |= b4; a8 |= b8;
 -  a1 ^= b1; a2 ^= b2; a4 ^= b4; a8 ^= b8;
 -  a1 <<= i; a2 <<= i; a4 <<= i; a8 <<= i;
 -  a1 >>= i; a2 >>= i; a4 >>= i; a8 >>= i;
 -  c1 >>= i; c2 >>= i; c4 >>= i; c8 >>= i;
 -  return 0;
 -}
 -EOF
 -
 -vector16=no
 -if compile_prog "" "" ; then
 -  vector16=yes
 -fi
 -
  ########################################
  # See if __attribute__((alias)) is supported.
  # This false for Xcode 9, but has been remedied for Xcode 10.
@@ -XXX,XX +XXX,XX @@ if test "$atomic64" = "yes" ; then
    echo "CONFIG_ATOMIC64=y" >> $config_host_mak
  fi
 -if test "$vector16" = "yes" ; then
 -  echo "CONFIG_VECTOR16=y" >> $config_host_mak
 -fi
 -
  if test "$attralias" = "yes" ; then
    echo "CONFIG_ATTRIBUTE_ALIAS=y" >> $config_host_mak
  fi
 diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/tcg-runtime-gvec.c
+--- a/accel/tcg/user-exec.c
-+++ b/accel/tcg/tcg-runtime-gvec.c
++++ b/accel/tcg/user-exec.c
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
- #include "tcg/tcg-gvec-desc.h"
+ #elif defined(__mips__)
--/* Virtually all hosts support 16-byte vectors.  Those that don't can emulate
++#if defined(__misp16) || defined(__mips_micromips)
-- * them via GCC's generic vector extension.  This turns out to be simpler and
++#error "Unsupported encoding"
-- * more reliable than getting the compiler to autovectorize.
++#endif
-- *
++
-- * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
+ int cpu_signal_handler(int host_signum, void *pinfo,
-- * are multiples of 16.
+                        void *puc)
 - *
 - * When the compiler does not support all of the operations we require, the
 - * loops are written so that we can always fall back on the base types.
 - */
 -#ifdef CONFIG_VECTOR16
 -typedef uint8_t vec8 __attribute__((vector_size(16)));
 -typedef uint16_t vec16 __attribute__((vector_size(16)));
 -typedef uint32_t vec32 __attribute__((vector_size(16)));
 -typedef uint64_t vec64 __attribute__((vector_size(16)));
 -
 -typedef int8_t svec8 __attribute__((vector_size(16)));
 -typedef int16_t svec16 __attribute__((vector_size(16)));
 -typedef int32_t svec32 __attribute__((vector_size(16)));
 -typedef int64_t svec64 __attribute__((vector_size(16)));
 -
 -#define DUP16(X)  { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
 -#define DUP8(X)   { X, X, X, X, X, X, X, X }
 -#define DUP4(X)   { X, X, X, X }
 -#define DUP2(X)   { X, X }
 -#else
  typedef uint8_t vec8;
  typedef uint16_t vec16;
  typedef uint32_t vec32;
@@ -XXX,XX +XXX,XX @@ typedef int64_t svec64;
  #define DUP8(X)   X
  #define DUP4(X)   X
  #define DUP2(X)   X
 -#endif /* CONFIG_VECTOR16 */
  static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
  {
-@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
+     siginfo_t *info = pinfo;
-     clear_high(d, oprsz, desc);
+     ucontext_t *uc = puc;
 -    greg_t pc = uc->uc_mcontext.pc;
 -    int is_write;
 +    uintptr_t pc = uc->uc_mcontext.pc;
 +    uint32_t insn = *(uint32_t *)pc;
 +    int is_write = 0;
 +
 +    /* Detect all store instructions at program counter. */
 +    switch((insn >> 26) & 077) {
 +    case 050: /* SB */
 +    case 051: /* SH */
 +    case 052: /* SWL */
 +    case 053: /* SW */
 +    case 054: /* SDL */
 +    case 055: /* SDR */
 +    case 056: /* SWR */
 +    case 070: /* SC */
 +    case 071: /* SWC1 */
 +    case 074: /* SCD */
 +    case 075: /* SDC1 */
 +    case 077: /* SD */
 +#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
 +    case 072: /* SWC2 */
 +    case 076: /* SDC2 */
 +#endif
 +        is_write = 1;
 +        break;
 +    case 023: /* COP1X */
 +        /* Required in all versions of MIPS64 since
 +           MIPS64r1 and subsequent versions of MIPS32r2. */
 +        switch (insn & 077) {
 +        case 010: /* SWXC1 */
 +        case 011: /* SDXC1 */
 +        case 015: /* SUXC1 */
 +            is_write = 1;
 +        }
 +        break;
 +    }
 -    /* XXX: compute is_write */
 -    is_write = 0;
      return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
  }
--/* If vectors are enabled, the compiler fills in -1 for true.
--   Otherwise, we must take care of this by hand.  */
--#ifdef CONFIG_VECTOR16
--# define DO_CMP0(X)  X
--#else
--# define DO_CMP0(X)  -(X)
--#endif
-+#define DO_CMP0(X)  -(X)
- #define DO_CMP1(NAME, TYPE, OP)                                            \
- void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
 --
-.20.1
+.25.1

The following changes since commit 40c67636f67c2a89745f2e698522fe917326a952:

Merge remote-tracking branch 'remotes/kraxel/tags/usb-20200317-pull-request' into staging (2020-03-17 14:00:56 +0000)

are available in the Git repository at:

https://github.com/rth7680/qemu.git tags/pull-tcg-20200317

for you to fetch changes up to 0270bd503e3699b7202200a2d693ad1feb57473f:

tcg: Remove tcg-runtime-gvec.c DO_CMP0 (2020-03-17 08:41:07 -0700)

----------------------------------------------------------------
Fix tcg/i386 bug vs sari_vec.
Fix tcg-runtime-gvec.c vs i386 without avx.

----------------------------------------------------------------
Richard Henderson (5):
      tcg/i386: Bound shift count expanding sari_vec
      tcg: Remove CONFIG_VECTOR16
      tcg: Tidy tcg-runtime-gvec.c types
      tcg: Tidy tcg-runtime-gvec.c DUP*
      tcg: Remove tcg-runtime-gvec.c DO_CMP0

configure                    |  56 --------
 accel/tcg/tcg-runtime-gvec.c | 298 +++++++++++++++++--------------------------
 tcg/i386/tcg-target.inc.c    |   9 +-
 3 files changed, 122 insertions(+), 241 deletions(-)

A given RISU testcase for SVE can produce

tcg-op-vec.c:511: do_shifti: Assertion `i >= 0 && i < (8 << vece)' failed.

because expand_vec_sari gave a shift count of 32 to a MO_32
vector shift.

In 44f1441dbe1, we changed from direct expansion of vector opcodes
to re-use of the tcg expanders.  So while the comment correctly notes
that the hw will handle such a shift count, we now have to take our
own sanity checks into account.  Which is easy in this particular case.

Fixes: 44f1441dbe1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target.inc.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -XXX,XX +XXX,XX @@ static void expand_vec_sari(TCGType type, unsigned vece,
 
     case MO_64:
         if (imm <= 32) {
-            /* We can emulate a small sign extend by performing an arithmetic
+            /*
+             * We can emulate a small sign extend by performing an arithmetic
              * 32-bit shift and overwriting the high half of a 64-bit logical
-             * shift (note that the ISA says shift of 32 is valid).
+             * shift.  Note that the ISA says shift of 32 is valid, but TCG
+             * does not, so we have to bound the smaller shift -- we get the
+             * same result in the high half either way.
              */
             t1 = tcg_temp_new_vec(type);
-            tcg_gen_sari_vec(MO_32, t1, v1, imm);
+            tcg_gen_sari_vec(MO_32, t1, v1, MIN(imm, 31));
             tcg_gen_shri_vec(MO_64, v0, v1, imm);
             vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
                       tcgv_vec_arg(v0), tcgv_vec_arg(v0),
-- 
2.20.1

The comment in tcg-runtime-gvec.c about CONFIG_VECTOR16 says that
tcg-op-gvec.c has eliminated size 8 vectors, and only passes on
multiples of 16.  This may have been true of the first few operations,
but is not true of all operations.

In particular, multiply, shift by scalar, and compare of 8- and 16-bit
elements are not expanded inline if host vector operations are not
supported.

For an x86_64 host that does not support AVX, this means that we will
fall back to the helper, which will attempt to use SSE instructions,
which will SEGV on an invalid 8-byte aligned memory operation.

This patch simply removes the CONFIG_VECTOR16 code and configuration
without further simplification.

Buglink: https://bugs.launchpad.net/bugs/1863508
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 configure                    | 56 ------------------------------------
 accel/tcg/tcg-runtime-gvec.c | 35 +---------------------
 2 files changed, 1 insertion(+), 90 deletions(-)

diff --git a/configure b/configure
index XXXXXXX..XXXXXXX 100755
--- a/configure
+++ b/configure
@@ -XXX,XX +XXX,XX @@ if  test "$plugins" = "yes" &&
       "for this purpose. You can't build with --static."
 fi
 
-########################################
-# See if 16-byte vector operations are supported.
-# Even without a vector unit the compiler may expand these.
-# There is a bug in old GCC for PPC that crashes here.
-# Unfortunately it's the system compiler for Centos 7.
-
-cat > $TMPC << EOF
-typedef unsigned char U1 __attribute__((vector_size(16)));
-typedef unsigned short U2 __attribute__((vector_size(16)));
-typedef unsigned int U4 __attribute__((vector_size(16)));
-typedef unsigned long long U8 __attribute__((vector_size(16)));
-typedef signed char S1 __attribute__((vector_size(16)));
-typedef signed short S2 __attribute__((vector_size(16)));
-typedef signed int S4 __attribute__((vector_size(16)));
-typedef signed long long S8 __attribute__((vector_size(16)));
-static U1 a1, b1;
-static U2 a2, b2;
-static U4 a4, b4;
-static U8 a8, b8;
-static S1 c1;
-static S2 c2;
-static S4 c4;
-static S8 c8;
-static int i;
-void helper(void *d, void *a, int shift, int i);
-void helper(void *d, void *a, int shift, int i)
-{
-  *(U1 *)(d + i) = *(U1 *)(a + i) << shift;
-  *(U2 *)(d + i) = *(U2 *)(a + i) << shift;
-  *(U4 *)(d + i) = *(U4 *)(a + i) << shift;
-  *(U8 *)(d + i) = *(U8 *)(a + i) << shift;
-}
-int main(void)
-{
-  a1 += b1; a2 += b2; a4 += b4; a8 += b8;
-  a1 -= b1; a2 -= b2; a4 -= b4; a8 -= b8;
-  a1 *= b1; a2 *= b2; a4 *= b4; a8 *= b8;
-  a1 &= b1; a2 &= b2; a4 &= b4; a8 &= b8;
-  a1 |= b1; a2 |= b2; a4 |= b4; a8 |= b8;
-  a1 ^= b1; a2 ^= b2; a4 ^= b4; a8 ^= b8;
-  a1 <<= i; a2 <<= i; a4 <<= i; a8 <<= i;
-  a1 >>= i; a2 >>= i; a4 >>= i; a8 >>= i;
-  c1 >>= i; c2 >>= i; c4 >>= i; c8 >>= i;
-  return 0;
-}
-EOF
-
-vector16=no
-if compile_prog "" "" ; then
-  vector16=yes
-fi
-
 ########################################
 # See if __attribute__((alias)) is supported.
 # This false for Xcode 9, but has been remedied for Xcode 10.
@@ -XXX,XX +XXX,XX @@ if test "$atomic64" = "yes" ; then
   echo "CONFIG_ATOMIC64=y" >> $config_host_mak
 fi
 
-if test "$vector16" = "yes" ; then
-  echo "CONFIG_VECTOR16=y" >> $config_host_mak
-fi
-
 if test "$attralias" = "yes" ; then
   echo "CONFIG_ATTRIBUTE_ALIAS=y" >> $config_host_mak
 fi
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg/tcg-gvec-desc.h"
 
 
-/* Virtually all hosts support 16-byte vectors.  Those that don't can emulate
- * them via GCC's generic vector extension.  This turns out to be simpler and
- * more reliable than getting the compiler to autovectorize.
- *
- * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
- * are multiples of 16.
- *
- * When the compiler does not support all of the operations we require, the
- * loops are written so that we can always fall back on the base types.
- */
-#ifdef CONFIG_VECTOR16
-typedef uint8_t vec8 __attribute__((vector_size(16)));
-typedef uint16_t vec16 __attribute__((vector_size(16)));
-typedef uint32_t vec32 __attribute__((vector_size(16)));
-typedef uint64_t vec64 __attribute__((vector_size(16)));
-
-typedef int8_t svec8 __attribute__((vector_size(16)));
-typedef int16_t svec16 __attribute__((vector_size(16)));
-typedef int32_t svec32 __attribute__((vector_size(16)));
-typedef int64_t svec64 __attribute__((vector_size(16)));
-
-#define DUP16(X)  { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
-#define DUP8(X)   { X, X, X, X, X, X, X, X }
-#define DUP4(X)   { X, X, X, X }
-#define DUP2(X)   { X, X }
-#else
 typedef uint8_t vec8;
 typedef uint16_t vec16;
 typedef uint32_t vec32;
@@ -XXX,XX +XXX,XX @@ typedef int64_t svec64;
 #define DUP8(X)   X
 #define DUP4(X)   X
 #define DUP2(X)   X
-#endif /* CONFIG_VECTOR16 */
 
 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
 {
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
     clear_high(d, oprsz, desc);
 }
 
-/* If vectors are enabled, the compiler fills in -1 for true.
-   Otherwise, we must take care of this by hand.  */
-#ifdef CONFIG_VECTOR16
-# define DO_CMP0(X)  X
-#else
-# define DO_CMP0(X)  -(X)
-#endif
+#define DO_CMP0(X)  -(X)
 
 #define DO_CMP1(NAME, TYPE, OP)                                            \
 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
-- 
2.20.1

Partial cleanup from the CONFIG_VECTOR16 removal.
Replace the vec* types with their scalar expansions.

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-runtime-gvec.c | 270 +++++++++++++++++------------------
 1 file changed, 130 insertions(+), 140 deletions(-)

diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg/tcg-gvec-desc.h"
 
 
-typedef uint8_t vec8;
-typedef uint16_t vec16;
-typedef uint32_t vec32;
-typedef uint64_t vec64;
-
-typedef int8_t svec8;
-typedef int16_t svec16;
-typedef int32_t svec32;
-typedef int64_t svec64;
-
 #define DUP16(X)  X
 #define DUP8(X)   X
 #define DUP4(X)   X
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec8 vecb = (vec8)DUP16(b);
+    uint8_t vecb = (uint8_t)DUP16(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec16 vecb = (vec16)DUP8(b);
+    uint16_t vecb = (uint16_t)DUP8(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec32 vecb = (vec32)DUP4(b);
+    uint32_t vecb = (uint32_t)DUP4(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec64 vecb = (vec64)DUP2(b);
+    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec8 vecb = (vec8)DUP16(b);
+    uint8_t vecb = (uint8_t)DUP16(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec16 vecb = (vec16)DUP8(b);
+    uint16_t vecb = (uint16_t)DUP8(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec32 vecb = (vec32)DUP4(b);
+    uint32_t vecb = (uint32_t)DUP4(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec64 vecb = (vec64)DUP2(b);
+    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec8 vecb = (vec8)DUP16(b);
+    uint8_t vecb = (uint8_t)DUP16(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec16 vecb = (vec16)DUP8(b);
+    uint16_t vecb = (uint16_t)DUP8(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec32 vecb = (vec32)DUP4(b);
+    uint32_t vecb = (uint32_t)DUP4(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec64 vecb = (vec64)DUP2(b);
+    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = -*(vec8 *)(a + i);
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = -*(vec16 *)(a + i);
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = -*(vec32 *)(a + i);
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = -*(vec64 *)(a + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i));
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i));
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i));
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec64 vecb = (vec64)DUP2(b);
+    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec64 vecb = (vec64)DUP2(b);
+    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    vec64 vecb = (vec64)DUP2(b);
+    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec8)) {
-        *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
+    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec16)) {
-        *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
+    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec32)) {
-        *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
+    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
     int shift = simd_data(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
 }
 
 #define DO_CMP2(SZ) \
-    DO_CMP1(gvec_eq##SZ, vec##SZ, ==)    \
-    DO_CMP1(gvec_ne##SZ, vec##SZ, !=)    \
-    DO_CMP1(gvec_lt##SZ, svec##SZ, <)    \
-    DO_CMP1(gvec_le##SZ, svec##SZ, <=)   \
-    DO_CMP1(gvec_ltu##SZ, vec##SZ, <)    \
-    DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
+    DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
+    DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
+    DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
+    DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
+    DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
+    DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
 
 DO_CMP2(8)
 DO_CMP2(16)
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
     intptr_t oprsz = simd_oprsz(desc);
     intptr_t i;
 
-    for (i = 0; i < oprsz; i += sizeof(vec64)) {
-        vec64 aa = *(vec64 *)(a + i);
-        vec64 bb = *(vec64 *)(b + i);
-        vec64 cc = *(vec64 *)(c + i);
-        *(vec64 *)(d + i) = (bb & aa) | (cc & ~aa);
+    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+        uint64_t aa = *(uint64_t *)(a + i);
+        uint64_t bb = *(uint64_t *)(b + i);
+        uint64_t cc = *(uint64_t *)(c + i);
+        *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
     }
     clear_high(d, oprsz, desc);
 }
-- 
2.20.1

Partial cleanup from the CONFIG_VECTOR16 removal.
Replace the DUP* expansions with the scalar argument.

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-runtime-gvec.c | 50 +++++++++++-------------------------
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg/tcg-gvec-desc.h"
 
 
-#define DUP16(X)  X
-#define DUP8(X)   X
-#define DUP4(X)   X
-#define DUP2(X)   X
-
 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
 {
     intptr_t maxsz = simd_maxsz(desc);
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint8_t vecb = (uint8_t)DUP16(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
-        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + vecb;
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint16_t vecb = (uint16_t)DUP8(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
-        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + vecb;
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint32_t vecb = (uint32_t)DUP4(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
-        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + vecb;
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + vecb;
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint8_t vecb = (uint8_t)DUP16(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
-        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - vecb;
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint16_t vecb = (uint16_t)DUP8(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
-        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - vecb;
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint32_t vecb = (uint32_t)DUP4(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
-        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - vecb;
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - vecb;
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint8_t vecb = (uint8_t)DUP16(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
-        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * vecb;
+        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint16_t vecb = (uint16_t)DUP8(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
-        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * vecb;
+        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint32_t vecb = (uint32_t)DUP4(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
-        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * vecb;
+        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * vecb;
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & vecb;
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ vecb;
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
     }
     clear_high(d, oprsz, desc);
 }
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
 {
     intptr_t oprsz = simd_oprsz(desc);
-    uint64_t vecb = (uint64_t)DUP2(b);
     intptr_t i;
 
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | vecb;
+        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
     }
     clear_high(d, oprsz, desc);
 }
-- 
2.20.1

Partial cleanup from the CONFIG_VECTOR16 removal.
Replace DO_CMP0 with its scalar expansion, a simple negation.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-runtime-gvec.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
     clear_high(d, oprsz, desc);
 }
 
-#define DO_CMP0(X)  -(X)
-
 #define DO_CMP1(NAME, TYPE, OP)                                            \
 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
 {                                                                          \
     intptr_t oprsz = simd_oprsz(desc);                                     \
     intptr_t i;                                                            \
     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
-        *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i));  \
+        *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
     }                                                                      \
     clear_high(d, oprsz, desc);                                            \
 }
@@ -XXX,XX +XXX,XX @@ DO_CMP2(16)
 DO_CMP2(32)
 DO_CMP2(64)
 
-#undef DO_CMP0
 #undef DO_CMP1
 #undef DO_CMP2
 
-- 
2.20.1

The following changes since commit 6eeea6725a70e6fcb5abba0764496bdab07ddfb3:

Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2020-10-06' into staging (2020-10-06 21:13:34 +0100)

are available in the Git repository at:

https://github.com/rth7680/qemu.git tags/pull-tcg-20201008

for you to fetch changes up to 62475e9d007d83db4d0a6ccebcda8914f392e9c9:

accel/tcg: Fix computing of is_write for MIPS (2020-10-08 05:57:32 -0500)

----------------------------------------------------------------
Extend maximum gvec vector size
Fix i386 avx2 dupi
Fix mips host user-only write detection
Misc cleanups.

----------------------------------------------------------------
Kele Huang (1):
      accel/tcg: Fix computing of is_write for MIPS

Richard Henderson (10):
      tcg: Adjust simd_desc size encoding
      tcg: Drop union from TCGArgConstraint
      tcg: Move sorted_args into TCGArgConstraint.sort_index
      tcg: Remove TCG_CT_REG
      tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields
      tcg: Remove TCGOpDef.used
      tcg/i386: Fix dupi for avx2 32-bit hosts
      tcg: Fix generation of dupi_vec for 32-bit host
      tcg/optimize: Fold dup2_vec
      tcg: Remove TCG_TARGET_HAS_cmp_vec

With larger vector sizes, it turns out oprsz == maxsz, and we only
need to represent mismatch for oprsz <= 32.  We do, however, need
to represent larger oprsz and do so without reducing SIMD_DATA_BITS.

Reduce the size of the oprsz field and increase the maxsz field.
Steal the oprsz value of 24 to indicate equality with maxsz.

Tested-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-gvec-desc.h | 38 ++++++++++++++++++++++++-------------
 tcg/tcg-op-gvec.c           | 35 ++++++++++++++++++++++++++--------
 2 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg-gvec-desc.h
+++ b/include/tcg/tcg-gvec-desc.h
@@ -XXX,XX +XXX,XX @@
 #ifndef TCG_TCG_GVEC_DESC_H
 #define TCG_TCG_GVEC_DESC_H
 
-/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
-#define SIMD_OPRSZ_SHIFT   0
-#define SIMD_OPRSZ_BITS    5
+/*
+ * This configuration allows MAXSZ to represent 2048 bytes, and
+ * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32.
+ *
+ * Encode this with:
+ *   0, 1, 3 -> 8, 16, 32
+ *   2       -> maxsz
+ *
+ * This steals the input that would otherwise map to 24 to match maxsz.
+ */
+#define SIMD_MAXSZ_SHIFT   0
+#define SIMD_MAXSZ_BITS    8
 
-#define SIMD_MAXSZ_SHIFT   (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
-#define SIMD_MAXSZ_BITS    5
+#define SIMD_OPRSZ_SHIFT   (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
+#define SIMD_OPRSZ_BITS    2
 
-#define SIMD_DATA_SHIFT    (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
+#define SIMD_DATA_SHIFT    (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
 #define SIMD_DATA_BITS     (32 - SIMD_DATA_SHIFT)
 
 /* Create a descriptor from components.  */
 uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
 
-/* Extract the operation size from a descriptor.  */
-static inline intptr_t simd_oprsz(uint32_t desc)
-{
-    return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
-}
-
 /* Extract the max vector size from a descriptor.  */
 static inline intptr_t simd_maxsz(uint32_t desc)
 {
-    return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
+    return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8;
+}
+
+/* Extract the operation size from a descriptor.  */
+static inline intptr_t simd_oprsz(uint32_t desc)
+{
+    uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS);
+    intptr_t o = f * 8 + 8;
+    intptr_t m = simd_maxsz(desc);
+    return f == 2 ? m : o;
 }
 
 /* Extract the operation-specific data from a descriptor.  */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -XXX,XX +XXX,XX @@ static const TCGOpcode vecop_list_empty[1] = { 0 };
    of the operand offsets so that we can check them all at once.  */
 static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
 {
-    uint32_t opr_align = oprsz >= 16 ? 15 : 7;
-    uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
-    tcg_debug_assert(oprsz > 0);
-    tcg_debug_assert(oprsz <= maxsz);
-    tcg_debug_assert((oprsz & opr_align) == 0);
+    uint32_t max_align;
+
+    switch (oprsz) {
+    case 8:
+    case 16:
+    case 32:
+        tcg_debug_assert(oprsz <= maxsz);
+        break;
+    default:
+        tcg_debug_assert(oprsz == maxsz);
+        break;
+    }
+    tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS));
+
+    max_align = maxsz >= 16 ? 15 : 7;
     tcg_debug_assert((maxsz & max_align) == 0);
     tcg_debug_assert((ofs & max_align) == 0);
 }
@@ -XXX,XX +XXX,XX @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
 {
     uint32_t desc = 0;
 
-    assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
-    assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
-    assert(data == sextract32(data, 0, SIMD_DATA_BITS));
+    check_size_align(oprsz, maxsz, 0);
+    tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS));
 
     oprsz = (oprsz / 8) - 1;
     maxsz = (maxsz / 8) - 1;
+
+    /*
+     * We have just asserted in check_size_align that either
+     * oprsz is {8,16,32} or matches maxsz.  Encode the final
+     * case with '2', as that would otherwise map to 24.
+     */
+    if (oprsz == maxsz) {
+        oprsz = 2;
+    }
+
     desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
     desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
     desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
-- 
2.25.1

The union is unused; let "regs" appear in the main structure
without the "u.regs" wrapping.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h            |  4 +---
 tcg/tcg.c                    | 22 +++++++++++-----------
 tcg/aarch64/tcg-target.c.inc | 14 +++++++-------
 tcg/arm/tcg-target.c.inc     | 26 +++++++++++++-------------
 tcg/i386/tcg-target.c.inc    | 26 +++++++++++++-------------
 tcg/mips/tcg-target.c.inc    | 18 +++++++++---------
 tcg/ppc/tcg-target.c.inc     | 24 ++++++++++++------------
 tcg/riscv/tcg-target.c.inc   | 14 +++++++-------
 tcg/s390/tcg-target.c.inc    | 18 +++++++++---------
 tcg/sparc/tcg-target.c.inc   | 16 ++++++++--------
 tcg/tci/tcg-target.c.inc     |  2 +-
 11 files changed, 91 insertions(+), 93 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
 typedef struct TCGArgConstraint {
     uint16_t ct;
     uint8_t alias_index;
-    union {
-        TCGRegSet regs;
-    } u;
+    TCGRegSet regs;
 } TCGArgConstraint;
 
 #define TCG_MAX_OP_ARGS 16
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
             return 0;
         n = 0;
         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
-            if (tcg_regset_test_reg(arg_ct->u.regs, i))
+            if (tcg_regset_test_reg(arg_ct->regs, i))
                 n++;
         }
     }
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
             /* Incomplete TCGTargetOpDef entry. */
             tcg_debug_assert(ct_str != NULL);
 
-            def->args_ct[i].u.regs = 0;
+            def->args_ct[i].regs = 0;
             def->args_ct[i].ct = 0;
             while (*ct_str != '\0') {
                 switch(*ct_str) {
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
                     pset = la_temp_pref(ts);
                     set = *pset;
 
-                    set &= ct->u.regs;
+                    set &= ct->regs;
                     if (ct->ct & TCG_CT_IALIAS) {
                         set &= op->output_pref[ct->alias_index];
                     }
                     /* If the combination is not possible, restart.  */
                     if (set == 0) {
-                        set = ct->u.regs;
+                        set = ct->regs;
                     }
                     *pset = set;
                 }
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
         return;
     }
 
-    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
-    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
+    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
+    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
 
     /* Allocate the output register now.  */
     if (ots->val_type != TEMP_VAL_REG) {
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
             }
         }
 
-        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
+        temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
         reg = ts->reg;
 
-        if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
+        if (tcg_regset_test_reg(arg_ct->regs, reg)) {
             /* nothing to do : the constraint is satisfied */
         } else {
         allocate_in_reg:
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
                and move the temporary register into it */
             temp_load(s, ts, tcg_target_available_regs[ts->type],
                       i_allocated_regs, 0);
-            reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
+            reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
                                 o_preferred_regs, ts->indirect_base);
             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
                 /*
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
                 && !const_args[arg_ct->alias_index]) {
                 reg = new_args[arg_ct->alias_index];
             } else if (arg_ct->ct & TCG_CT_NEWREG) {
-                reg = tcg_reg_alloc(s, arg_ct->u.regs,
+                reg = tcg_reg_alloc(s, arg_ct->regs,
                                     i_allocated_regs | o_allocated_regs,
                                     op->output_pref[k], ts->indirect_base);
             } else {
-                reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
+                reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
                                     op->output_pref[k], ts->indirect_base);
             }
             tcg_regset_set_reg(o_allocated_regs, reg);
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'r': /* general registers */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs |= 0xffffffffu;
+        ct->regs |= 0xffffffffu;
         break;
     case 'w': /* advsimd registers */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs |= 0xffffffff00000000ull;
+        ct->regs |= 0xffffffff00000000ull;
         break;
     case 'l': /* qemu_ld / qemu_st address, data_reg */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffffu;
+        ct->regs = 0xffffffffu;
 #ifdef CONFIG_SOFTMMU
         /* x0 and x1 will be overwritten when reading the tlb entry,
            and x2, and x3 for helper args, better to avoid using them. */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
 #endif
         break;
     case 'A': /* Valid for arithmetic immediate (positive or negative).  */
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
+        ct->regs = 0xffff;
         break;
 
     /* qemu_ld address */
     case 'l':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
+        ct->regs = 0xffff;
 #ifdef CONFIG_SOFTMMU
         /* r0-r2,lr will be overwritten when reading the tlb entry,
            so don't use these. */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
 #endif
         break;
 
     /* qemu_st address & data */
     case 's':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
+        ct->regs = 0xffff;
         /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
            and r0-r1 doing the byte swapping, so don't use these. */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
 #if defined(CONFIG_SOFTMMU)
         /* Avoid clashes with registers being used for helper args */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
 #if TARGET_LONG_BITS == 64
         /* Avoid clashes with registers being used for helper args */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #endif
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
 #endif
         break;
 
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch(*ct_str++) {
     case 'a':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
+        tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
         break;
     case 'b':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
+        tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
         break;
     case 'c':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
+        tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
         break;
     case 'd':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
+        tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
         break;
     case 'S':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
+        tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
         break;
     case 'D':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
+        tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
         break;
     case 'q':
         /* A register that can be used as a byte operand.  */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
+        ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
         break;
     case 'Q':
         /* A register with an addressable second byte (e.g. %ah).  */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xf;
+        ct->regs = 0xf;
         break;
     case 'r':
         /* A general register.  */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs |= ALL_GENERAL_REGS;
+        ct->regs |= ALL_GENERAL_REGS;
         break;
     case 'W':
         /* With TZCNT/LZCNT, we can have operand-size as an input.  */
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     case 'x':
         /* A vector register.  */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs |= ALL_VECTOR_REGS;
+        ct->regs |= ALL_VECTOR_REGS;
         break;
 
         /* qemu_ld/st address constraint */
     case 'L':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+        ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
         break;
 
     case 'e':
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch(*ct_str++) {
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         break;
     case 'L': /* qemu_ld input arg constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
+        ct->regs = 0xffffffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
+            tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
         }
 #endif
         break;
     case 'S': /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
+        ct->regs = 0xffffffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
+            tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
+            tcg_regset_reset_reg(ct->regs, TCG_REG_A3);
         } else {
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
+            tcg_regset_reset_reg(ct->regs, TCG_REG_A1);
         }
 #endif
         break;
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'A': case 'B': case 'C': case 'D':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
+        tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
         break;
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         break;
     case 'v':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff00000000ull;
+        ct->regs = 0xffffffff00000000ull;
         break;
     case 'L':                   /* qemu_ld constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+        ct->regs = 0xffffffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
 #endif
         break;
     case 'S':                   /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+        ct->regs = 0xffffffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
 #endif
         break;
     case 'I':
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         break;
     case 'L':
         /* qemu_ld/qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
 #if defined(CONFIG_SOFTMMU)
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]);
-        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[0]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[1]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[2]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[3]);
+        tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[4]);
 #endif
         break;
     case 'I':
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'r':                  /* all registers */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
+        ct->regs = 0xffff;
         break;
     case 'L':                  /* qemu_ld/st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffff;
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
+        ct->regs = 0xffff;
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
         break;
     case 'a':                  /* force R2 for division */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_R2);
+        ct->regs = 0;
+        tcg_regset_set_reg(ct->regs, TCG_REG_R2);
         break;
     case 'b':                  /* force R3 for division */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
+        ct->regs = 0;
+        tcg_regset_set_reg(ct->regs, TCG_REG_R3);
         break;
     case 'A':
         ct->ct |= TCG_CT_CONST_S33;
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     switch (*ct_str++) {
     case 'r':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         break;
     case 'R':
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = ALL_64;
+        ct->regs = ALL_64;
         break;
     case 'A': /* qemu_ld/st address constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
+        ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
     reserve_helpers:
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_O1);
+        tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
         break;
     case 's': /* qemu_st data 32-bit constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = 0xffffffff;
+        ct->regs = 0xffffffff;
         goto reserve_helpers;
     case 'S': /* qemu_st data 64-bit constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = ALL_64;
+        ct->regs = ALL_64;
         goto reserve_helpers;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     case 'L':                   /* qemu_ld constraint */
     case 'S':                   /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1;
+        ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
         break;
     default:
         return NULL;
-- 
2.25.1

This uses an existing hole in the TCGArgConstraint structure
and will be convenient for keeping the data in one place.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h |  2 +-
 tcg/tcg.c         | 35 +++++++++++++++++------------------
 2 files changed, 18 insertions(+), 19 deletions(-)

This wasn't actually used for anything, really.  All variable
operands must accept registers, and which are indicated by the
set in TCGArgConstraint.regs.

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
 #define TCG_CT_ALIAS  0x80
 #define TCG_CT_IALIAS 0x40
 #define TCG_CT_NEWREG 0x20 /* output requires a new register */
-#define TCG_CT_REG    0x01
 #define TCG_CT_CONST  0x02 /* any constant of register size */
 
 typedef struct TCGArgConstraint {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
 /* we give more priority to constraints with less registers */
 static int get_constraint_priority(const TCGOpDef *def, int k)
 {
-    const TCGArgConstraint *arg_ct;
+    const TCGArgConstraint *arg_ct = &def->args_ct[k];
+    int n;
 
-    int i, n;
-    arg_ct = &def->args_ct[k];
     if (arg_ct->ct & TCG_CT_ALIAS) {
         /* an alias is equivalent to a single register */
         n = 1;
     } else {
-        if (!(arg_ct->ct & TCG_CT_REG))
-            return 0;
-        n = 0;
-        for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
-            if (tcg_regset_test_reg(arg_ct->regs, i))
-                n++;
-        }
+        n = ctpop64(arg_ct->regs);
     }
     return TCG_TARGET_NB_REGS - n + 1;
 }
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
                         int oarg = *ct_str - '0';
                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
                         tcg_debug_assert(oarg < def->nb_oargs);
-                        tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
+                        tcg_debug_assert(def->args_ct[oarg].regs != 0);
                         /* TCG_CT_ALIAS is for the output arguments.
                            The input is tagged with TCG_CT_IALIAS. */
                         def->args_ct[i] = def->args_ct[oarg];
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'r': /* general registers */
-        ct->ct |= TCG_CT_REG;
         ct->regs |= 0xffffffffu;
         break;
     case 'w': /* advsimd registers */
-        ct->ct |= TCG_CT_REG;
         ct->regs |= 0xffffffff00000000ull;
         break;
     case 'l': /* qemu_ld / qemu_st address, data_reg */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffffu;
 #ifdef CONFIG_SOFTMMU
         /* x0 and x1 will be overwritten when reading the tlb entry,
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
         break;
 
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
         break;
 
     /* qemu_ld address */
     case 'l':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
 #ifdef CONFIG_SOFTMMU
         /* r0-r2,lr will be overwritten when reading the tlb entry,
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 
     /* qemu_st address & data */
     case 's':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
         /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
            and r0-r1 doing the byte swapping, so don't use these. */
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch(*ct_str++) {
     case 'a':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
         break;
     case 'b':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
         break;
     case 'c':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
         break;
     case 'd':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
         break;
     case 'S':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
         break;
     case 'D':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
         break;
     case 'q':
         /* A register that can be used as a byte operand.  */
-        ct->ct |= TCG_CT_REG;
         ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
         break;
     case 'Q':
         /* A register with an addressable second byte (e.g. %ah).  */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xf;
         break;
     case 'r':
         /* A general register.  */
-        ct->ct |= TCG_CT_REG;
         ct->regs |= ALL_GENERAL_REGS;
         break;
     case 'W':
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
         break;
     case 'x':
         /* A vector register.  */
-        ct->ct |= TCG_CT_REG;
         ct->regs |= ALL_VECTOR_REGS;
         break;
 
         /* qemu_ld/st address constraint */
     case 'L':
-        ct->ct |= TCG_CT_REG;
         ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
         tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch(*ct_str++) {
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         break;
     case 'L': /* qemu_ld input arg constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 #endif
         break;
     case 'S': /* qemu_st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'A': case 'B': case 'C': case 'D':
-        ct->ct |= TCG_CT_REG;
         tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
         break;
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         break;
     case 'v':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff00000000ull;
         break;
     case 'L':                   /* qemu_ld constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 #endif
         break;
     case 'S':                   /* qemu_st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         break;
     case 'L':
         /* qemu_ld/qemu_st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         /* qemu_ld/qemu_st uses TCG_REG_TMP0 */
 #if defined(CONFIG_SOFTMMU)
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'r':                  /* all registers */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
         break;
     case 'L':                  /* qemu_ld/st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffff;
         tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
         tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
         tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
         break;
     case 'a':                  /* force R2 for division */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0;
         tcg_regset_set_reg(ct->regs, TCG_REG_R2);
         break;
     case 'b':                  /* force R3 for division */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0;
         tcg_regset_set_reg(ct->regs, TCG_REG_R3);
         break;
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
 {
     switch (*ct_str++) {
     case 'r':
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         break;
     case 'R':
-        ct->ct |= TCG_CT_REG;
         ct->regs = ALL_64;
         break;
     case 'A': /* qemu_ld/st address constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
     reserve_helpers:
         tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
         tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
         break;
     case 's': /* qemu_st data 32-bit constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = 0xffffffff;
         goto reserve_helpers;
     case 'S': /* qemu_st data 64-bit constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = ALL_64;
         goto reserve_helpers;
     case 'I':
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     case 'r':
     case 'L':                   /* qemu_ld constraint */
     case 'S':                   /* qemu_st constraint */
-        ct->ct |= TCG_CT_REG;
         ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
         break;
     default:
-- 
2.25.1

These are easier to set and test when they have their own fields.
Reduce the size of alias_index and sort_index to 4 bits, which is
sufficient for TCG_MAX_OP_ARGS.  This leaves only the bits indicating
constants within the ct field.

Move all initialization to allocation time, rather than init
individual fields in process_op_defs.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg.h | 14 +++++++-------
 tcg/tcg.c         | 28 ++++++++++++----------------
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void);
 void tcg_dump_info(void);
 void tcg_dump_op_count(void);
 
-#define TCG_CT_ALIAS  0x80
-#define TCG_CT_IALIAS 0x40
-#define TCG_CT_NEWREG 0x20 /* output requires a new register */
-#define TCG_CT_CONST  0x02 /* any constant of register size */
+#define TCG_CT_CONST  1 /* any constant of register size */
 
 typedef struct TCGArgConstraint {
-    uint16_t ct;
-    uint8_t alias_index;
-    uint8_t sort_index;
+    unsigned ct : 16;
+    unsigned alias_index : 4;
+    unsigned sort_index : 4;
+    bool oalias : 1;
+    bool ialias : 1;
+    bool newreg : 1;
     TCGRegSet regs;
 } TCGArgConstraint;
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
         total_args += n;
     }
 
-    args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
+    args_ct = g_new0(TCGArgConstraint, total_args);
 
     for(op = 0; op < NB_OPS; op++) {
         def = &tcg_op_defs[op];
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
     const TCGArgConstraint *arg_ct = &def->args_ct[k];
     int n;
 
-    if (arg_ct->ct & TCG_CT_ALIAS) {
+    if (arg_ct->oalias) {
         /* an alias is equivalent to a single register */
         n = 1;
     } else {
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
             /* Incomplete TCGTargetOpDef entry. */
             tcg_debug_assert(ct_str != NULL);
 
-            def->args_ct[i].regs = 0;
-            def->args_ct[i].ct = 0;
             while (*ct_str != '\0') {
                 switch(*ct_str) {
                 case '0' ... '9':
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
                         tcg_debug_assert(oarg < def->nb_oargs);
                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
-                        /* TCG_CT_ALIAS is for the output arguments.
-                           The input is tagged with TCG_CT_IALIAS. */
                         def->args_ct[i] = def->args_ct[oarg];
-                        def->args_ct[oarg].ct |= TCG_CT_ALIAS;
+                        /* The output sets oalias.  */
+                        def->args_ct[oarg].oalias = true;
                         def->args_ct[oarg].alias_index = i;
-                        def->args_ct[i].ct |= TCG_CT_IALIAS;
+                        /* The input sets ialias. */
+                        def->args_ct[i].ialias = true;
                         def->args_ct[i].alias_index = oarg;
                     }
                     ct_str++;
                     break;
                 case '&':
-                    def->args_ct[i].ct |= TCG_CT_NEWREG;
+                    def->args_ct[i].newreg = true;
                     ct_str++;
                     break;
                 case 'i':
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
                     set = *pset;
 
                     set &= ct->regs;
-                    if (ct->ct & TCG_CT_IALIAS) {
+                    if (ct->ialias) {
                         set &= op->output_pref[ct->alias_index];
                     }
                     /* If the combination is not possible, restart.  */
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
         }
 
         i_preferred_regs = o_preferred_regs = 0;
-        if (arg_ct->ct & TCG_CT_IALIAS) {
+        if (arg_ct->ialias) {
             o_preferred_regs = op->output_pref[arg_ct->alias_index];
             if (ts->fixed_reg) {
                 /* if fixed register, we must allocate a new register
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
                     reg = ts->reg;
                     for (k2 = 0 ; k2 < k ; k2++) {
                         i2 = def->args_ct[nb_oargs + k2].sort_index;
-                        if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
-                            reg == new_args[i2]) {
+                        if (def->args_ct[i2].ialias && reg == new_args[i2]) {
                             goto allocate_in_reg;
                         }
                     }
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
             /* ENV should not be modified.  */
             tcg_debug_assert(!ts->fixed_reg);
 
-            if ((arg_ct->ct & TCG_CT_ALIAS)
-                && !const_args[arg_ct->alias_index]) {
+            if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
                 reg = new_args[arg_ct->alias_index];
-            } else if (arg_ct->ct & TCG_CT_NEWREG) {
+            } else if (arg_ct->newreg) {
                 reg = tcg_reg_alloc(s, arg_ct->regs,
                                     i_allocated_regs | o_allocated_regs,
                                     op->output_pref[k], ts->indirect_base);
-- 
2.25.1

The definition of INDEX_op_dupi_vec is that it operates on
units of tcg_target_ulong -- in this case 32 bits.  It does
not work to use this for a uint64_t value that happens to be
small enough to fit in tcg_target_ulong.

Fixes: d2fd745fe8b
Fixes: db432672dc5
Cc: qemu-stable@nongnu.org
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg-op-vec.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
 
 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
 {
-    if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
-        do_dupi_vec(r, MO_32, a);
-    } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
+    if (TCG_TARGET_REG_BITS == 64) {
         do_dupi_vec(r, MO_64, a);
+    } else if (a == dup_const(MO_32, a)) {
+        do_dupi_vec(r, MO_32, a);
     } else {
         TCGv_i64 c = tcg_const_i64(a);
         tcg_gen_dup_i64_vec(MO_64, r, c);
@@ -XXX,XX +XXX,XX @@ void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
 
 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 {
-    do_dupi_vec(r, MO_REG, dup_const(vece, a));
+    if (vece == MO_64) {
+        tcg_gen_dup64i_vec(r, a);
+    } else {
+        do_dupi_vec(r, MO_REG, dup_const(vece, a));
+    }
 }
 
 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
-- 
2.25.1

When the two arguments are identical, this can be reduced to
dup_vec or to mov_vec from a tcg_constant_vec.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
             }
             goto do_default;
 
+        case INDEX_op_dup2_vec:
+            assert(TCG_TARGET_REG_BITS == 32);
+            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+                tmp = arg_info(op->args[1])->val;
+                if (tmp == arg_info(op->args[2])->val) {
+                    tcg_opt_gen_movi(s, op, op->args[0], tmp);
+                    break;
+                }
+            } else if (args_are_copies(op->args[1], op->args[2])) {
+                op->opc = INDEX_op_dup_vec;
+                TCGOP_VECE(op) = MO_32;
+                nb_iargs = 1;
+            }
+            goto do_default;
+
         CASE_OP_32_64(not):
         CASE_OP_32_64(neg):
         CASE_OP_32_64(ext8s):
-- 
2.25.1

The cmp_vec opcode is mandatory; this symbol is unused.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.h | 1 -
 tcg/i386/tcg-target.h    | 1 -
 tcg/ppc/tcg-target.h     | 1 -
 3 files changed, 3 deletions(-)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -XXX,XX +XXX,XX @@ typedef enum {
 #define TCG_TARGET_HAS_shi_vec          1
 #define TCG_TARGET_HAS_shs_vec          0
 #define TCG_TARGET_HAS_shv_vec          1
-#define TCG_TARGET_HAS_cmp_vec          1
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
 #define TCG_TARGET_HAS_shi_vec          1
 #define TCG_TARGET_HAS_shs_vec          1
 #define TCG_TARGET_HAS_shv_vec          have_avx2
-#define TCG_TARGET_HAS_cmp_vec          1
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
 #define TCG_TARGET_HAS_shi_vec          0
 #define TCG_TARGET_HAS_shs_vec          0
 #define TCG_TARGET_HAS_shv_vec          1
-#define TCG_TARGET_HAS_cmp_vec          1
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
-- 
2.25.1

From: Kele Huang <kele.hwang@gmail.com>

Detect all MIPS store instructions in cpu_signal_handler for all available
MIPS versions, and set is_write if encountering such store instructions.

This fixed the error while dealing with self-modified code for MIPS.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Kele Huang <kele.hwang@gmail.com>
Signed-off-by: Xu Zou <iwatchnima@gmail.com>
Message-Id: <20201002081420.10814-1-kele.hwang@gmail.com>
[rth: Use uintptr_t for pc to fix n32 build error.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/user-exec.c | 43 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
 
 #elif defined(__mips__)
 
+#if defined(__misp16) || defined(__mips_micromips)
+#error "Unsupported encoding"
+#endif
+
 int cpu_signal_handler(int host_signum, void *pinfo,
                        void *puc)
 {
     siginfo_t *info = pinfo;
     ucontext_t *uc = puc;
-    greg_t pc = uc->uc_mcontext.pc;
-    int is_write;
+    uintptr_t pc = uc->uc_mcontext.pc;
+    uint32_t insn = *(uint32_t *)pc;
+    int is_write = 0;
+
+    /* Detect all store instructions at program counter. */
+    switch((insn >> 26) & 077) {
+    case 050: /* SB */
+    case 051: /* SH */
+    case 052: /* SWL */
+    case 053: /* SW */
+    case 054: /* SDL */
+    case 055: /* SDR */
+    case 056: /* SWR */
+    case 070: /* SC */
+    case 071: /* SWC1 */
+    case 074: /* SCD */
+    case 075: /* SDC1 */
+    case 077: /* SD */
+#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
+    case 072: /* SWC2 */
+    case 076: /* SDC2 */
+#endif
+        is_write = 1;
+        break;
+    case 023: /* COP1X */
+        /* Required in all versions of MIPS64 since
+           MIPS64r1 and subsequent versions of MIPS32r2. */
+        switch (insn & 077) {
+        case 010: /* SWXC1 */
+        case 011: /* SDXC1 */
+        case 015: /* SUXC1 */
+            is_write = 1;
+        }
+        break;
+    }
 
-    /* XXX: compute is_write */
-    is_write = 0;
     return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
 }
 
-- 
2.25.1