Series comparison

-[PULL 0/4] tcg patch queue
+[PULL v2 00/12] tcg patch queue
-The following changes since commit 67e41fe0cfb62e6cdfa659f0155417d17e5274ea:
+The following changes since commit c586691e676214eb7edf6a468e84e7ce3b314d43:
-  Merge tag 'pull-ppc-20220104' of https://github.com/legoater/qemu into staging (2022-01-04 07:23:27 -0800)
+  Merge tag 'pull-target-arm-20230502-2' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-05-02 16:38:29 +0100)
 are available in the Git repository at:
-  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220104
+  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502-2
-for you to fetch changes up to d7478d4229f0a2b2817a55487e6b17081099fae4:
+for you to fetch changes up to 129f1f9ee7df77d367d961b3c25353612d33cd83:
-  common-user: Fix tail calls to safe_syscall_set_errno_tail (2022-01-04 15:41:03 -0800)
+  tcg: Introduce tcg_out_movext2 (2023-05-02 13:05:45 -0700)
 ----------------------------------------------------------------
-Fix for safe_syscall_base.
+Misc tcg-related patch queue.
-Fix for folding of vector add/sub.
-Fix build on loongarch64 with gcc 8.
+v2: Update bitops.h rotate patch.
 Remove decl for qemu_run_machine_init_done_notifiers.
 ----------------------------------------------------------------
-Philippe Mathieu-Daudé (1):
+Dickon Hood (1):
-      linux-user: Fix trivial build error on loongarch64 hosts
+      qemu/bitops.h: Limit rotate amounts
-Richard Henderson (2):
+Kiran Ostrolenk (1):
-      tcg/optimize: Fix folding of vector ops
+      qemu/host-utils.h: Add clz and ctz functions for lower-bit integers
       common-user: Fix tail calls to safe_syscall_set_errno_tail
-Xiaoyao Li (1):
+Nazar Kazakov (2):
-      sysemu: Cleanup qemu_run_machine_init_done_notifiers()
+      tcg: Add tcg_gen_gvec_andcs
       tcg: Add tcg_gen_gvec_rotrs
- include/sysemu/sysemu.h                    |  1 -
+Richard Henderson (7):
- linux-user/host/loongarch64/host-signal.h  |  4 +--
+      softmmu: Tidy dirtylimit_dirty_ring_full_time
- tcg/optimize.c                             | 49 +++++++++++++++++++++++-------
+      qemu/int128: Re-shuffle Int128Alias members
- common-user/host/i386/safe-syscall.inc.S   |  1 +
+      migration/xbzrle: Use __attribute__((target)) for avx512
- common-user/host/mips/safe-syscall.inc.S   |  1 +
+      accel/tcg: Add cpu_ld*_code_mmu
- common-user/host/x86_64/safe-syscall.inc.S |  1 +
+      tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
-files changed, 42 insertions(+), 15 deletions(-)
+      tcg/mips: Conditionalize tcg_out_exts_i32_i64
       tcg: Introduce tcg_out_movext2
+Weiwei Li (1):
+      accel/tcg: Uncache the host address for instruction fetch when tlb size < 1
+ meson.build                      |  5 +--
+ accel/tcg/tcg-runtime.h          |  1 +
+ include/exec/cpu_ldst.h          |  9 ++++++
+ include/qemu/bitops.h            | 16 +++++-----
+ include/qemu/host-utils.h        | 54 +++++++++++++++++++++++++++++++
+ include/qemu/int128.h            |  4 +--
+ include/tcg/tcg-op-gvec.h        |  4 +++
+ accel/tcg/cputlb.c               | 53 ++++++++++++++++++++++++++++++
+ accel/tcg/tcg-runtime-gvec.c     | 11 +++++++
+ accel/tcg/user-exec.c            | 58 +++++++++++++++++++++++++++++++++
+ migration/xbzrle.c               |  9 +++---
+ softmmu/dirtylimit.c             | 15 ++++++---
+ tcg/tcg-op-gvec.c                | 28 ++++++++++++++++
+ tcg/tcg.c                        | 69 +++++++++++++++++++++++++++++++++++++---
+ tcg/arm/tcg-target.c.inc         | 44 +++++++++++--------------
+ tcg/i386/tcg-target.c.inc        | 19 +++++------
+ tcg/loongarch64/tcg-target.c.inc |  4 ++-
+ tcg/mips/tcg-target.c.inc        |  4 ++-
+files changed, 339 insertions(+), 68 deletions(-)

-[PULL 1/4] tcg/optimize: Fix folding of vector ops
+[PULL v2 03/12] qemu/bitops.h: Limit rotate amounts
-Bitwise operations are easy to fold, because the operation is
+From: Dickon Hood <dickon.hood@codethink.co.uk>
 identical regardless of element size.  But add and sub need
 extra element size info that is not currently propagated.
-Fixes: 2f9f08ba43d
+Rotates have been fixed up to only allow for reasonable rotate amounts
-Cc: qemu-stable@nongnu.org
+(ie, no rotates >7 on an 8b value etc.)  This fixes a problem with riscv
-Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799
+vector rotate instructions.
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk>
 [rth: Mask shifts in both directions.]
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- tcg/optimize.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
+ include/qemu/bitops.h | 16 ++++++++--------
-file changed, 38 insertions(+), 11 deletions(-)
+file changed, 8 insertions(+), 8 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
+diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
 index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
+--- a/include/qemu/bitops.h
-+++ b/tcg/optimize.c
++++ b/include/qemu/bitops.h
-@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
+@@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
-     CASE_OP_32_64(mul):
+  */
-         return x * y;
+ static inline uint8_t rol8(uint8_t word, unsigned int shift)
+ {
--    CASE_OP_32_64(and):
+-    return (word << shift) | (word >> ((8 - shift) & 7));
-+    CASE_OP_32_64_VEC(and):
++    return (word << (shift & 7)) | (word >> (-shift & 7));
          return x & y;
 -    CASE_OP_32_64(or):
 +    CASE_OP_32_64_VEC(or):
          return x | y;
 -    CASE_OP_32_64(xor):
 +    CASE_OP_32_64_VEC(xor):
          return x ^ y;
      case INDEX_op_shl_i32:
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
      case INDEX_op_rotl_i64:
          return rol64(x, y & 63);
 -    CASE_OP_32_64(not):
 +    CASE_OP_32_64_VEC(not):
          return ~x;
      CASE_OP_32_64(neg):
          return -x;
 -    CASE_OP_32_64(andc):
 +    CASE_OP_32_64_VEC(andc):
          return x & ~y;
 -    CASE_OP_32_64(orc):
 +    CASE_OP_32_64_VEC(orc):
          return x | ~y;
      CASE_OP_32_64(eqv):
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
      return false;
  }
-+static bool fold_commutative(OptContext *ctx, TCGOp *op)
+ /**
-+{
+@@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
-+    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+  */
-+    return false;
+ static inline uint8_t ror8(uint8_t word, unsigned int shift)
 +}
 +
  static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
  {
-     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+-    return (word >> shift) | (word << ((8 - shift) & 7));
-@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
++    return (word >> (shift & 7)) | (word << (-shift & 7));
      return false;
  }
-+/* We cannot as yet do_constant_folding with vectors. */
+ /**
-+static bool fold_add_vec(OptContext *ctx, TCGOp *op)
+@@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
-+{
+  */
-+    if (fold_commutative(ctx, op) ||
+ static inline uint16_t rol16(uint16_t word, unsigned int shift)
 +        fold_xi_to_x(ctx, op, 0)) {
 +        return true;
 +    }
 +    return false;
 +}
 +
  static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
  {
-     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
+-    return (word << shift) | (word >> ((16 - shift) & 15));
-@@ -XXX,XX +XXX,XX @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
++    return (word << (shift & 15)) | (word >> (-shift & 15));
      return false;
  }
--static bool fold_sub(OptContext *ctx, TCGOp *op)
+ /**
-+/* We cannot as yet do_constant_folding with vectors. */
+@@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
-+static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
+  */
  static inline uint16_t ror16(uint16_t word, unsigned int shift)
  {
--    if (fold_const2(ctx, op) ||
+-    return (word >> shift) | (word << ((16 - shift) & 15));
--        fold_xx_to_i(ctx, op, 0) ||
++    return (word >> (shift & 15)) | (word << (-shift & 15));
 +    if (fold_xx_to_i(ctx, op, 0) ||
          fold_xi_to_x(ctx, op, 0) ||
          fold_sub_to_neg(ctx, op)) {
          return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
      return false;
  }
-+static bool fold_sub(OptContext *ctx, TCGOp *op)
+ /**
-+{
+@@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
-+    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
+  */
-+}
+ static inline uint32_t rol32(uint32_t word, unsigned int shift)
 +
  static bool fold_sub2(OptContext *ctx, TCGOp *op)
  {
-     return fold_addsub2(ctx, op, false);
+-    return (word << shift) | (word >> ((32 - shift) & 31));
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
++    return (word << (shift & 31)) | (word >> (-shift & 31));
-          * Sorted alphabetically by opcode as much as possible.
+ }
-          */
-         switch (opc) {
+ /**
--        CASE_OP_32_64_VEC(add):
+@@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
-+        CASE_OP_32_64(add):
+  */
-             done = fold_add(&ctx, op);
+ static inline uint32_t ror32(uint32_t word, unsigned int shift)
-             break;
+ {
-+        case INDEX_op_add_vec:
+-    return (word >> shift) | (word << ((32 - shift) & 31));
-+            done = fold_add_vec(&ctx, op);
++    return (word >> (shift & 31)) | (word << (-shift & 31));
-+            break;
+ }
-         CASE_OP_32_64(add2):
-             done = fold_add2(&ctx, op);
+ /**
-             break;
+@@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
+  */
-         CASE_OP_32_64(sextract):
+ static inline uint64_t rol64(uint64_t word, unsigned int shift)
-             done = fold_sextract(&ctx, op);
+ {
-             break;
+-    return (word << shift) | (word >> ((64 - shift) & 63));
--        CASE_OP_32_64_VEC(sub):
++    return (word << (shift & 63)) | (word >> (-shift & 63));
-+        CASE_OP_32_64(sub):
+ }
-             done = fold_sub(&ctx, op);
-             break;
+ /**
-+        case INDEX_op_sub_vec:
+@@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
-+            done = fold_sub_vec(&ctx, op);
+  */
-+            break;
+ static inline uint64_t ror64(uint64_t word, unsigned int shift)
-         CASE_OP_32_64(sub2):
+ {
-             done = fold_sub2(&ctx, op);
+-    return (word >> shift) | (word << ((64 - shift) & 63));
-             break;
++    return (word >> (shift & 63)) | (word << (-shift & 63));
  }
  /**
 --
-.25.1
+.34.1

-[PULL 2/4] linux-user: Fix trivial build error on loongarch64 hosts
+Deleted patch
-From: Philippe Mathieu-Daudé <f4bug@amsat.org>
-When building using GCC 8.3.0 on loongarch64 (Loongnix) we get:
-  In file included from ../linux-user/signal.c:33:
-  ../linux-user/host/loongarch64/host-signal.h: In function ‘host_signal_write’:
-  ../linux-user/host/loongarch64/host-signal.h:57:9: error: a label can only be part of a statement and a declaration is not a statement
-         uint32_t sel = (insn >> 15) & 0b11111111111;
-         ^~~~~~~~
-We don't use the 'sel' variable more than once, so drop it.
-Meson output for the record:
-  Host machine cpu family: loongarch64
-  Host machine cpu: loongarch64
-  C compiler for the host machine: cc (gcc 8.3.0 "cc (Loongnix 8.3.0-6.lnd.vec.27) 8.3.0")
-  C linker for the host machine: cc ld.bfd 2.31.1-system
-Fixes: ad812c3bd65 ("linux-user: Implement CPU-specific signal handler for loongarch64 hosts")
-Reported-by: Song Gao <gaosong@loongson.cn>
-Suggested-by: Song Gao <gaosong@loongson.cn>
-Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Reviewed-by: WANG Xuerui <git@xen0n.name>
-Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Message-Id: <20220104215027.2180972-1-f4bug@amsat.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- linux-user/host/loongarch64/host-signal.h | 4 +---
-file changed, 1 insertion(+), 3 deletions(-)
-diff --git a/linux-user/host/loongarch64/host-signal.h b/linux-user/host/loongarch64/host-signal.h
-index XXXXXXX..XXXXXXX 100644
---- a/linux-user/host/loongarch64/host-signal.h
-+++ b/linux-user/host/loongarch64/host-signal.h
-@@ -XXX,XX +XXX,XX @@ static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
-         }
-         break;
-     case 0b001110: /* indexed, atomic, bounds-checking memory operations */
--        uint32_t sel = (insn >> 15) & 0b11111111111;
--
--        switch (sel) {
-+        switch ((insn >> 15) & 0b11111111111) {
-         case 0b00000100000: /* stx.b */
-         case 0b00000101000: /* stx.h */
-         case 0b00000110000: /* stx.w */
---
-.25.1

-[PULL 3/4] sysemu: Cleanup qemu_run_machine_init_done_notifiers()
+Deleted patch
-From: Xiaoyao Li <xiaoyao.li@intel.com>
-Remove qemu_run_machine_init_done_notifiers() since no implementation
-and user.
-Fixes: f66dc8737c9 ("vl: move all generic initialization out of vl.c")
-Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Message-Id: <20220104024136.1433545-1-xiaoyao.li@intel.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- include/sysemu/sysemu.h | 1 -
-file changed, 1 deletion(-)
-diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/sysemu/sysemu.h
-+++ b/include/sysemu/sysemu.h
-@@ -XXX,XX +XXX,XX @@ extern bool qemu_uuid_set;
- void qemu_add_exit_notifier(Notifier *notify);
- void qemu_remove_exit_notifier(Notifier *notify);
--void qemu_run_machine_init_done_notifiers(void);
- void qemu_add_machine_init_done_notifier(Notifier *notify);
- void qemu_remove_machine_init_done_notifier(Notifier *notify);
---
-.25.1

-[PULL 4/4] common-user: Fix tail calls to safe_syscall_set_errno_tail
+Deleted patch
-For the ABIs in which the syscall return register is not
-also the first function argument register, move the errno
-value into the correct place.
-Fixes: a3310c0397e2 ("linux-user: Move syscall error detection into safe_syscall_base")
-Reported-by: Laurent Vivier <laurent@vivier.eu>
-Tested-by: Laurent Vivier <laurent@vivier.eu>
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-Message-Id: <20220104190454.542225-1-richard.henderson@linaro.org>
----
- common-user/host/i386/safe-syscall.inc.S   | 1 +
- common-user/host/mips/safe-syscall.inc.S   | 1 +
- common-user/host/x86_64/safe-syscall.inc.S | 1 +
-files changed, 3 insertions(+)
-diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S
-index XXXXXXX..XXXXXXX 100644
---- a/common-user/host/i386/safe-syscall.inc.S
-+++ b/common-user/host/i386/safe-syscall.inc.S
-@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
-         pop     %ebp
-         .cfi_adjust_cfa_offset -4
-         .cfi_restore ebp
-+        mov     %eax, (%esp)
-         jmp     safe_syscall_set_errno_tail
-         .cfi_endproc
-diff --git a/common-user/host/mips/safe-syscall.inc.S b/common-user/host/mips/safe-syscall.inc.S
-index XXXXXXX..XXXXXXX 100644
---- a/common-user/host/mips/safe-syscall.inc.S
-+++ b/common-user/host/mips/safe-syscall.inc.S
-@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
-:      USE_ALT_CP(t0)
-         SETUP_GPX(t1)
-         SETUP_GPX64(t0, t1)
-+        move    a0, v0
-         PTR_LA  t9, safe_syscall_set_errno_tail
-         jr      t9
-diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S
-index XXXXXXX..XXXXXXX 100644
---- a/common-user/host/x86_64/safe-syscall.inc.S
-+++ b/common-user/host/x86_64/safe-syscall.inc.S
-@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
-:      pop     %rbp
-         .cfi_def_cfa_offset 8
-         .cfi_restore rbp
-+        mov     %eax, %edi
-         jmp     safe_syscall_set_errno_tail
-         .cfi_endproc
---
-.25.1

The following changes since commit 67e41fe0cfb62e6cdfa659f0155417d17e5274ea:

Merge tag 'pull-ppc-20220104' of https://github.com/legoater/qemu into staging (2022-01-04 07:23:27 -0800)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220104

for you to fetch changes up to d7478d4229f0a2b2817a55487e6b17081099fae4:

common-user: Fix tail calls to safe_syscall_set_errno_tail (2022-01-04 15:41:03 -0800)

----------------------------------------------------------------
Fix for safe_syscall_base.
Fix for folding of vector add/sub.
Fix build on loongarch64 with gcc 8.
Remove decl for qemu_run_machine_init_done_notifiers.

----------------------------------------------------------------
Philippe Mathieu-Daudé (1):
      linux-user: Fix trivial build error on loongarch64 hosts

Richard Henderson (2):
      tcg/optimize: Fix folding of vector ops
      common-user: Fix tail calls to safe_syscall_set_errno_tail

Xiaoyao Li (1):
      sysemu: Cleanup qemu_run_machine_init_done_notifiers()

include/sysemu/sysemu.h                    |  1 -
 linux-user/host/loongarch64/host-signal.h  |  4 +--
 tcg/optimize.c                             | 49 +++++++++++++++++++++++-------
 common-user/host/i386/safe-syscall.inc.S   |  1 +
 common-user/host/mips/safe-syscall.inc.S   |  1 +
 common-user/host/x86_64/safe-syscall.inc.S |  1 +
 6 files changed, 42 insertions(+), 15 deletions(-)

Bitwise operations are easy to fold, because the operation is
identical regardless of element size.  But add and sub need
extra element size info that is not currently propagated.

Fixes: 2f9f08ba43d
Cc: qemu-stable@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
     CASE_OP_32_64(mul):
         return x * y;
 
-    CASE_OP_32_64(and):
+    CASE_OP_32_64_VEC(and):
         return x & y;
 
-    CASE_OP_32_64(or):
+    CASE_OP_32_64_VEC(or):
         return x | y;
 
-    CASE_OP_32_64(xor):
+    CASE_OP_32_64_VEC(xor):
         return x ^ y;
 
     case INDEX_op_shl_i32:
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
     case INDEX_op_rotl_i64:
         return rol64(x, y & 63);
 
-    CASE_OP_32_64(not):
+    CASE_OP_32_64_VEC(not):
         return ~x;
 
     CASE_OP_32_64(neg):
         return -x;
 
-    CASE_OP_32_64(andc):
+    CASE_OP_32_64_VEC(andc):
         return x & ~y;
 
-    CASE_OP_32_64(orc):
+    CASE_OP_32_64_VEC(orc):
         return x | ~y;
 
     CASE_OP_32_64(eqv):
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_commutative(OptContext *ctx, TCGOp *op)
+{
+    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+    return false;
+}
+
 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 {
     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_add_vec(OptContext *ctx, TCGOp *op)
+{
+    if (fold_commutative(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0)) {
+        return true;
+    }
+    return false;
+}
+
 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 {
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
     return false;
 }
 
-static bool fold_sub(OptContext *ctx, TCGOp *op)
+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
-        fold_xx_to_i(ctx, op, 0) ||
+    if (fold_xx_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, 0) ||
         fold_sub_to_neg(ctx, op)) {
         return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_sub(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
+}
+
 static bool fold_sub2(OptContext *ctx, TCGOp *op)
 {
     return fold_addsub2(ctx, op, false);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          * Sorted alphabetically by opcode as much as possible.
          */
         switch (opc) {
-        CASE_OP_32_64_VEC(add):
+        CASE_OP_32_64(add):
             done = fold_add(&ctx, op);
             break;
+        case INDEX_op_add_vec:
+            done = fold_add_vec(&ctx, op);
+            break;
         CASE_OP_32_64(add2):
             done = fold_add2(&ctx, op);
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(sextract):
             done = fold_sextract(&ctx, op);
             break;
-        CASE_OP_32_64_VEC(sub):
+        CASE_OP_32_64(sub):
             done = fold_sub(&ctx, op);
             break;
+        case INDEX_op_sub_vec:
+            done = fold_sub_vec(&ctx, op);
+            break;
         CASE_OP_32_64(sub2):
             done = fold_sub2(&ctx, op);
             break;
-- 
2.25.1

From: Philippe Mathieu-Daudé <f4bug@amsat.org>

When building using GCC 8.3.0 on loongarch64 (Loongnix) we get:

In file included from ../linux-user/signal.c:33:
  ../linux-user/host/loongarch64/host-signal.h: In function ‘host_signal_write’:
  ../linux-user/host/loongarch64/host-signal.h:57:9: error: a label can only be part of a statement and a declaration is not a statement
         uint32_t sel = (insn >> 15) & 0b11111111111;
         ^~~~~~~~

We don't use the 'sel' variable more than once, so drop it.

Meson output for the record:

Host machine cpu family: loongarch64
  Host machine cpu: loongarch64
  C compiler for the host machine: cc (gcc 8.3.0 "cc (Loongnix 8.3.0-6.lnd.vec.27) 8.3.0")
  C linker for the host machine: cc ld.bfd 2.31.1-system

Fixes: ad812c3bd65 ("linux-user: Implement CPU-specific signal handler for loongarch64 hosts")
Reported-by: Song Gao <gaosong@loongson.cn>
Suggested-by: Song Gao <gaosong@loongson.cn>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220104215027.2180972-1-f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/host/loongarch64/host-signal.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/linux-user/host/loongarch64/host-signal.h b/linux-user/host/loongarch64/host-signal.h
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/host/loongarch64/host-signal.h
+++ b/linux-user/host/loongarch64/host-signal.h
@@ -XXX,XX +XXX,XX @@ static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
         }
         break;
     case 0b001110: /* indexed, atomic, bounds-checking memory operations */
-        uint32_t sel = (insn >> 15) & 0b11111111111;
-
-        switch (sel) {
+        switch ((insn >> 15) & 0b11111111111) {
         case 0b00000100000: /* stx.b */
         case 0b00000101000: /* stx.h */
         case 0b00000110000: /* stx.w */
-- 
2.25.1

For the ABIs in which the syscall return register is not
also the first function argument register, move the errno
value into the correct place.

Fixes: a3310c0397e2 ("linux-user: Move syscall error detection into safe_syscall_base")
Reported-by: Laurent Vivier <laurent@vivier.eu>
Tested-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220104190454.542225-1-richard.henderson@linaro.org>
---
 common-user/host/i386/safe-syscall.inc.S   | 1 +
 common-user/host/mips/safe-syscall.inc.S   | 1 +
 common-user/host/x86_64/safe-syscall.inc.S | 1 +
 3 files changed, 3 insertions(+)

diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S
index XXXXXXX..XXXXXXX 100644
--- a/common-user/host/i386/safe-syscall.inc.S
+++ b/common-user/host/i386/safe-syscall.inc.S
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
         pop     %ebp
         .cfi_adjust_cfa_offset -4
         .cfi_restore ebp
+        mov     %eax, (%esp)
         jmp     safe_syscall_set_errno_tail
 
         .cfi_endproc
diff --git a/common-user/host/mips/safe-syscall.inc.S b/common-user/host/mips/safe-syscall.inc.S
index XXXXXXX..XXXXXXX 100644
--- a/common-user/host/mips/safe-syscall.inc.S
+++ b/common-user/host/mips/safe-syscall.inc.S
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
 1:      USE_ALT_CP(t0)
         SETUP_GPX(t1)
         SETUP_GPX64(t0, t1)
+        move    a0, v0
         PTR_LA  t9, safe_syscall_set_errno_tail
         jr      t9
 
diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S
index XXXXXXX..XXXXXXX 100644
--- a/common-user/host/x86_64/safe-syscall.inc.S
+++ b/common-user/host/x86_64/safe-syscall.inc.S
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
 1:      pop     %rbp
         .cfi_def_cfa_offset 8
         .cfi_restore rbp
+        mov     %eax, %edi
         jmp     safe_syscall_set_errno_tail
         .cfi_endproc
 
-- 
2.25.1

The following changes since commit c586691e676214eb7edf6a468e84e7ce3b314d43:

Merge tag 'pull-target-arm-20230502-2' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-05-02 16:38:29 +0100)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502-2

for you to fetch changes up to 129f1f9ee7df77d367d961b3c25353612d33cd83:

tcg: Introduce tcg_out_movext2 (2023-05-02 13:05:45 -0700)

----------------------------------------------------------------
Misc tcg-related patch queue.

v2: Update bitops.h rotate patch.

----------------------------------------------------------------
Dickon Hood (1):
      qemu/bitops.h: Limit rotate amounts

Kiran Ostrolenk (1):
      qemu/host-utils.h: Add clz and ctz functions for lower-bit integers

Nazar Kazakov (2):
      tcg: Add tcg_gen_gvec_andcs
      tcg: Add tcg_gen_gvec_rotrs

Richard Henderson (7):
      softmmu: Tidy dirtylimit_dirty_ring_full_time
      qemu/int128: Re-shuffle Int128Alias members
      migration/xbzrle: Use __attribute__((target)) for avx512
      accel/tcg: Add cpu_ld*_code_mmu
      tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
      tcg/mips: Conditionalize tcg_out_exts_i32_i64
      tcg: Introduce tcg_out_movext2

Weiwei Li (1):
      accel/tcg: Uncache the host address for instruction fetch when tlb size < 1

From: Dickon Hood <dickon.hood@codethink.co.uk>

Rotates have been fixed up to only allow for reasonable rotate amounts
(ie, no rotates >7 on an 8b value etc.)  This fixes a problem with riscv
vector rotate instructions.

Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk>
[rth: Mask shifts in both directions.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/qemu/bitops.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index XXXXXXX..XXXXXXX 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
  */
 static inline uint8_t rol8(uint8_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> ((8 - shift) & 7));
+    return (word << (shift & 7)) | (word >> (-shift & 7));
 }
 
 /**
@@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
  */
 static inline uint8_t ror8(uint8_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << ((8 - shift) & 7));
+    return (word >> (shift & 7)) | (word << (-shift & 7));
 }
 
 /**
@@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
  */
 static inline uint16_t rol16(uint16_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> ((16 - shift) & 15));
+    return (word << (shift & 15)) | (word >> (-shift & 15));
 }
 
 /**
@@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
  */
 static inline uint16_t ror16(uint16_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << ((16 - shift) & 15));
+    return (word >> (shift & 15)) | (word << (-shift & 15));
 }
 
 /**
@@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
  */
 static inline uint32_t rol32(uint32_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> ((32 - shift) & 31));
+    return (word << (shift & 31)) | (word >> (-shift & 31));
 }
 
 /**
@@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
  */
 static inline uint32_t ror32(uint32_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << ((32 - shift) & 31));
+    return (word >> (shift & 31)) | (word << (-shift & 31));
 }
 
 /**
@@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
  */
 static inline uint64_t rol64(uint64_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> ((64 - shift) & 63));
+    return (word << (shift & 63)) | (word >> (-shift & 63));
 }
 
 /**
@@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
  */
 static inline uint64_t ror64(uint64_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << ((64 - shift) & 63));
+    return (word >> (shift & 63)) | (word << (-shift & 63));
 }
 
 /**
-- 
2.34.1