Series comparison

-[PULL 00/16] tcg patch queue
+[PULL 0/4] tcg patch queue
-The following changes since commit 3e08b2b9cb64bff2b73fa9128c0e49bfcde0dd40:
+The following changes since commit 67e41fe0cfb62e6cdfa659f0155417d17e5274ea:
-  Merge remote-tracking branch 'remotes/philmd-gitlab/tags/edk2-next-20200121' into staging (2020-01-21 15:29:25 +0000)
+  Merge tag 'pull-ppc-20220104' of https://github.com/legoater/qemu into staging (2022-01-04 07:23:27 -0800)
 are available in the Git repository at:
-  https://github.com/rth7680/qemu.git tags/pull-tcg-20200121
+  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220104
-for you to fetch changes up to 75fa376cdab5e5db2c7fdd107358e16f95503ac6:
+for you to fetch changes up to d7478d4229f0a2b2817a55487e6b17081099fae4:
-  scripts/git.orderfile: Display decodetree before C source (2020-01-21 15:26:09 -1000)
+  common-user: Fix tail calls to safe_syscall_set_errno_tail (2022-01-04 15:41:03 -0800)
 ----------------------------------------------------------------
-Remove another limit to NB_MMU_MODES.
+Fix for safe_syscall_base.
-Fix compilation using uclibc.
+Fix for folding of vector add/sub.
-Fix defaulting of -accel parameters.
+Fix build on loongarch64 with gcc 8.
-Tidy cputlb basic routines.
+Remove decl for qemu_run_machine_init_done_notifiers.
 Adjust git.orderfile for decodetree.
 ----------------------------------------------------------------
-Carlos Santos (1):
+Philippe Mathieu-Daudé (1):
-      util/cacheinfo: fix crash when compiling with uClibc
+      linux-user: Fix trivial build error on loongarch64 hosts
-Philippe Mathieu-Daudé (1):
+Richard Henderson (2):
-      scripts/git.orderfile: Display decodetree before C source
+      tcg/optimize: Fix folding of vector ops
       common-user: Fix tail calls to safe_syscall_set_errno_tail
-Richard Henderson (14):
+Xiaoyao Li (1):
-      cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
+      sysemu: Cleanup qemu_run_machine_init_done_notifiers()
       vl: Remove unused variable in configure_accelerators
       vl: Reduce scope of variables in configure_accelerators
       vl: Remove useless test in configure_accelerators
       vl: Only choose enabled accelerators in configure_accelerators
       cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
       cputlb: Make tlb_n_entries private to cputlb.c
       cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
       cputlb: Hoist tlb portions in tlb_mmu_resize_locked
       cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
       cputlb: Split out tlb_mmu_flush_locked
       cputlb: Partially merge tlb_dyn_init into tlb_init
       cputlb: Initialize tlbs as flushed
       cputlb: Hoist timestamp outside of loops over tlbs
- include/exec/cpu_ldst.h |   5 -
+ include/sysemu/sysemu.h                    |  1 -
- accel/tcg/cputlb.c      | 287 +++++++++++++++++++++++++++++++++---------------
+ linux-user/host/loongarch64/host-signal.h  |  4 +--
- util/cacheinfo.c        |  10 +-
+ tcg/optimize.c                             | 49 +++++++++++++++++++++++-------
- vl.c                    |  27 +++--
+ common-user/host/i386/safe-syscall.inc.S   |  1 +
- scripts/git.orderfile   |   3 +
+ common-user/host/mips/safe-syscall.inc.S   |  1 +
-files changed, 223 insertions(+), 109 deletions(-)
+ common-user/host/x86_64/safe-syscall.inc.S |  1 +
 files changed, 42 insertions(+), 15 deletions(-)

-[PULL 01/16] cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
+Deleted patch
-In target/arm we will shortly have "too many" mmu_idx.
-The current minimum barrier is caused by the way in which
-tlb_flush_page_by_mmuidx is coded.
-We can remove this limitation by allocating memory for
-consumption by the worker.  Let us assume that this is
-the unlikely case, as will be the case for the majority
-of targets which have so far satisfied the BUILD_BUG_ON,
-and only allocate memory when necessary.
-Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++----------
-file changed, 132 insertions(+), 35 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
-     }
- }
--/* As we are going to hijack the bottom bits of the page address for a
-- * mmuidx bit mask we need to fail to build if we can't do that
-+/**
-+ * tlb_flush_page_by_mmuidx_async_0:
-+ * @cpu: cpu on which to flush
-+ * @addr: page of virtual address to flush
-+ * @idxmap: set of mmu_idx to flush
-+ *
-+ * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
-+ * at @addr from the tlbs indicated by @idxmap from @cpu.
-  */
--QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
--
--static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
--                                                run_on_cpu_data data)
-+static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
-+                                             target_ulong addr,
-+                                             uint16_t idxmap)
- {
-     CPUArchState *env = cpu->env_ptr;
--    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
--    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
--    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
-     int mmu_idx;
-     assert_cpu_is_self(cpu);
--    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
--              addr, mmu_idx_bitmap);
-+    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
-     qemu_spin_lock(&env_tlb(env)->c.lock);
-     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
--        if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
-+        if ((idxmap >> mmu_idx) & 1) {
-             tlb_flush_page_locked(env, mmu_idx, addr);
-         }
-     }
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
-     tb_flush_jmp_cache(cpu, addr);
- }
-+/**
-+ * tlb_flush_page_by_mmuidx_async_1:
-+ * @cpu: cpu on which to flush
-+ * @data: encoded addr + idxmap
-+ *
-+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
-+ * async_run_on_cpu.  The idxmap parameter is encoded in the page
-+ * offset of the target_ptr field.  This limits the set of mmu_idx
-+ * that can be passed via this method.
-+ */
-+static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
-+                                             run_on_cpu_data data)
-+{
-+    target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
-+    target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
-+    uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
-+
-+    tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
-+}
-+
-+typedef struct {
-+    target_ulong addr;
-+    uint16_t idxmap;
-+} TLBFlushPageByMMUIdxData;
-+
-+/**
-+ * tlb_flush_page_by_mmuidx_async_2:
-+ * @cpu: cpu on which to flush
-+ * @data: allocated addr + idxmap
-+ *
-+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
-+ * async_run_on_cpu.  The addr+idxmap parameters are stored in a
-+ * TLBFlushPageByMMUIdxData structure that has been allocated
-+ * specifically for this helper.  Free the structure when done.
-+ */
-+static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
-+                                             run_on_cpu_data data)
-+{
-+    TLBFlushPageByMMUIdxData *d = data.host_ptr;
-+
-+    tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
-+    g_free(d);
-+}
-+
- void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
- {
--    target_ulong addr_and_mmu_idx;
--
-     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
-     /* This should already be page aligned */
--    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
--    addr_and_mmu_idx |= idxmap;
-+    addr &= TARGET_PAGE_MASK;
--    if (!qemu_cpu_is_self(cpu)) {
--        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
--                         RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
-+    if (qemu_cpu_is_self(cpu)) {
-+        tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
-+    } else if (idxmap < TARGET_PAGE_SIZE) {
-+        /*
-+         * Most targets have only a few mmu_idx.  In the case where
-+         * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
-+         * allocating memory for this operation.
-+         */
-+        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
-+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
-     } else {
--        tlb_flush_page_by_mmuidx_async_work(
--            cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
-+        TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
-+
-+        /* Otherwise allocate a structure, freed by the worker.  */
-+        d->addr = addr;
-+        d->idxmap = idxmap;
-+        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
-+                         RUN_ON_CPU_HOST_PTR(d));
-     }
- }
-@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
- void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
-                                        uint16_t idxmap)
- {
--    const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
--    target_ulong addr_and_mmu_idx;
--
-     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
-     /* This should already be page aligned */
--    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
--    addr_and_mmu_idx |= idxmap;
-+    addr &= TARGET_PAGE_MASK;
--    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
--    fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
-+    /*
-+     * Allocate memory to hold addr+idxmap only when needed.
-+     * See tlb_flush_page_by_mmuidx for details.
-+     */
-+    if (idxmap < TARGET_PAGE_SIZE) {
-+        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
-+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
-+    } else {
-+        CPUState *dst_cpu;
-+
-+        /* Allocate a separate data block for each destination cpu.  */
-+        CPU_FOREACH(dst_cpu) {
-+            if (dst_cpu != src_cpu) {
-+                TLBFlushPageByMMUIdxData *d
-+                    = g_new(TLBFlushPageByMMUIdxData, 1);
-+
-+                d->addr = addr;
-+                d->idxmap = idxmap;
-+                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
-+                                 RUN_ON_CPU_HOST_PTR(d));
-+            }
-+        }
-+    }
-+
-+    tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
- }
- void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
-@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
-                                               target_ulong addr,
-                                               uint16_t idxmap)
- {
--    const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
--    target_ulong addr_and_mmu_idx;
--
-     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
-     /* This should already be page aligned */
--    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
--    addr_and_mmu_idx |= idxmap;
-+    addr &= TARGET_PAGE_MASK;
--    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
--    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
-+    /*
-+     * Allocate memory to hold addr+idxmap only when needed.
-+     * See tlb_flush_page_by_mmuidx for details.
-+     */
-+    if (idxmap < TARGET_PAGE_SIZE) {
-+        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
-+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
-+        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
-+                              RUN_ON_CPU_TARGET_PTR(addr | idxmap));
-+    } else {
-+        CPUState *dst_cpu;
-+        TLBFlushPageByMMUIdxData *d;
-+
-+        /* Allocate a separate data block for each destination cpu.  */
-+        CPU_FOREACH(dst_cpu) {
-+            if (dst_cpu != src_cpu) {
-+                d = g_new(TLBFlushPageByMMUIdxData, 1);
-+                d->addr = addr;
-+                d->idxmap = idxmap;
-+                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
-+                                 RUN_ON_CPU_HOST_PTR(d));
-+            }
-+        }
-+
-+        d = g_new(TLBFlushPageByMMUIdxData, 1);
-+        d->addr = addr;
-+        d->idxmap = idxmap;
-+        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
-+                              RUN_ON_CPU_HOST_PTR(d));
-+    }
- }
- void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
---
-.20.1

-[PULL 02/16] util/cacheinfo: fix crash when compiling with uClibc
+Deleted patch
-From: Carlos Santos <casantos@redhat.com>
-uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE
-but the corresponding sysconf calls returns -1, which is a valid result,
-meaning that the limit is indeterminate.
-Handle this situation using the fallback values instead of crashing due
-to an assertion failure.
-Signed-off-by: Carlos Santos <casantos@redhat.com>
-Message-Id: <20191017123713.30192-1-casantos@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- util/cacheinfo.c | 10 ++++++++--
-file changed, 8 insertions(+), 2 deletions(-)
-diff --git a/util/cacheinfo.c b/util/cacheinfo.c
-index XXXXXXX..XXXXXXX 100644
---- a/util/cacheinfo.c
-+++ b/util/cacheinfo.c
-@@ -XXX,XX +XXX,XX @@ static void sys_cache_info(int *isize, int *dsize)
- static void sys_cache_info(int *isize, int *dsize)
- {
- # ifdef _SC_LEVEL1_ICACHE_LINESIZE
--    *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
-+    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
-+    if (tmp_isize > 0) {
-+        *isize = tmp_isize;
-+    }
- # endif
- # ifdef _SC_LEVEL1_DCACHE_LINESIZE
--    *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
-+    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
-+    if (tmp_dsize > 0) {
-+        *dsize = tmp_dsize;
-+    }
- # endif
- }
- #endif /* sys_cache_info */
---
-.20.1

-[PULL 03/16] vl: Remove unused variable in configure_accelerators
+Deleted patch
-The accel_initialised variable no longer has any setters.
-Fixes: 6f6e1698a68c
-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- vl.c | 3 +--
-file changed, 1 insertion(+), 2 deletions(-)
-diff --git a/vl.c b/vl.c
-index XXXXXXX..XXXXXXX 100644
---- a/vl.c
-+++ b/vl.c
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
- {
-     const char *accel;
-     char **accel_list, **tmp;
--    bool accel_initialised = false;
-     bool init_failed = false;
-     qemu_opts_foreach(qemu_find_opts("icount"),
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
-         accel_list = g_strsplit(accel, ":", 0);
--        for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
-+        for (tmp = accel_list; tmp && *tmp; tmp++) {
-             /*
-              * Filter invalid accelerators here, to prevent obscenities
-              * such as "-machine accel=tcg,,thread=single".
---
-.20.1

-[PULL 04/16] vl: Reduce scope of variables in configure_accelerators
+Deleted patch
-The accel_list and tmp variables are only used when manufacturing
--machine accel, options based on -accel.
-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- vl.c | 3 ++-
-file changed, 2 insertions(+), 1 deletion(-)
-diff --git a/vl.c b/vl.c
-index XXXXXXX..XXXXXXX 100644
---- a/vl.c
-+++ b/vl.c
-@@ -XXX,XX +XXX,XX @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
- static void configure_accelerators(const char *progname)
- {
-     const char *accel;
--    char **accel_list, **tmp;
-     bool init_failed = false;
-     qemu_opts_foreach(qemu_find_opts("icount"),
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
-     accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
-     if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
-+        char **accel_list, **tmp;
-+
-         if (accel == NULL) {
-             /* Select the default accelerator */
-             if (!accel_find("tcg") && !accel_find("kvm")) {
---
-.20.1

-[PULL 05/16] vl: Remove useless test in configure_accelerators
+Deleted patch
-The result of g_strsplit is never NULL.
-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- vl.c | 2 +-
-file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/vl.c b/vl.c
-index XXXXXXX..XXXXXXX 100644
---- a/vl.c
-+++ b/vl.c
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
-         accel_list = g_strsplit(accel, ":", 0);
--        for (tmp = accel_list; tmp && *tmp; tmp++) {
-+        for (tmp = accel_list; *tmp; tmp++) {
-             /*
-              * Filter invalid accelerators here, to prevent obscenities
-              * such as "-machine accel=tcg,,thread=single".
---
-.20.1

-[PULL 06/16] vl: Only choose enabled accelerators in configure_accelerators
+Deleted patch
-By choosing "tcg:kvm" when kvm is not enabled, we generate
-an incorrect warning: "invalid accelerator kvm".
-At the same time, use g_str_has_suffix rather than open-coding
-the same operation.
-Presumably the inverse is also true with --disable-tcg.
-Fixes: 28a0961757fc
-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- vl.c | 21 +++++++++++++--------
-file changed, 13 insertions(+), 8 deletions(-)
-diff --git a/vl.c b/vl.c
-index XXXXXXX..XXXXXXX 100644
---- a/vl.c
-+++ b/vl.c
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
-         if (accel == NULL) {
-             /* Select the default accelerator */
--            if (!accel_find("tcg") && !accel_find("kvm")) {
--                error_report("No accelerator selected and"
--                             " no default accelerator available");
--                exit(1);
--            } else {
--                int pnlen = strlen(progname);
--                if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) {
-+            bool have_tcg = accel_find("tcg");
-+            bool have_kvm = accel_find("kvm");
-+
-+            if (have_tcg && have_kvm) {
-+                if (g_str_has_suffix(progname, "kvm")) {
-                     /* If the program name ends with "kvm", we prefer KVM */
-                     accel = "kvm:tcg";
-                 } else {
-                     accel = "tcg:kvm";
-                 }
-+            } else if (have_kvm) {
-+                accel = "kvm";
-+            } else if (have_tcg) {
-+                accel = "tcg";
-+            } else {
-+                error_report("No accelerator selected and"
-+                             " no default accelerator available");
-+                exit(1);
-             }
-         }
--
-         accel_list = g_strsplit(accel, ":", 0);
-         for (tmp = accel_list; *tmp; tmp++) {
---
-.20.1

-[PULL 07/16] cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
+Deleted patch
-There is only one caller for tlb_table_flush_by_mmuidx.  Place
-the result at the earlier line number, due to an expected user
-in the near future.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 19 +++++++------------
-file changed, 7 insertions(+), 12 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
-     }
- }
--static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
-+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
- {
-     tlb_mmu_resize_locked(env, mmu_idx);
--    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
-     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
-+    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-+    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-+    env_tlb(env)->d[mmu_idx].vindex = 0;
-+    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
-+    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
-+           sizeof(env_tlb(env)->d[0].vtable));
- }
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
-@@ -XXX,XX +XXX,XX @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
-     *pelide = elide;
- }
--static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
--{
--    tlb_table_flush_by_mmuidx(env, mmu_idx);
--    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
--    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
--    env_tlb(env)->d[mmu_idx].vindex = 0;
--    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
--           sizeof(env_tlb(env)->d[0].vtable));
--}
--
- static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
- {
-     CPUArchState *env = cpu->env_ptr;
---
-.20.1

-[PULL 08/16] cputlb: Make tlb_n_entries private to cputlb.c
+Deleted patch
-There are no users of this function outside cputlb.c,
-and its interface will change in the next patch.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- include/exec/cpu_ldst.h | 5 -----
- accel/tcg/cputlb.c      | 5 +++++
-files changed, 5 insertions(+), 5 deletions(-)
-diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/exec/cpu_ldst.h
-+++ b/include/exec/cpu_ldst.h
-@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
-     return (addr >> TARGET_PAGE_BITS) & size_mask;
- }
--static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
--{
--    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
--}
--
- /* Find the TLB entry corresponding to the mmu_idx + address pair.  */
- static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
-                                      target_ulong addr)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
- QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
- #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
-+static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
-+{
-+    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
-+}
-+
- static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
- {
-     return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
---
-.20.1

-[PULL 09/16] cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
+Deleted patch
-We do not need the entire CPUArchState to compute these values.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 15 ++++++++-------
-file changed, 8 insertions(+), 7 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
- QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
- #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
--static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
-+static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
- {
--    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
-+    return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
- }
--static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
-+static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
- {
--    return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
-+    return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
- }
- static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
-@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
- static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
- {
-     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
--    size_t old_size = tlb_n_entries(env, mmu_idx);
-+    size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
-     size_t rate;
-     size_t new_size = old_size;
-     int64_t now = get_clock_realtime();
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-     env_tlb(env)->d[mmu_idx].vindex = 0;
--    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
-+    memset(env_tlb(env)->f[mmu_idx].table, -1,
-+           sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
-     memset(env_tlb(env)->d[mmu_idx].vtable, -1,
-            sizeof(env_tlb(env)->d[0].vtable));
- }
-@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
-     qemu_spin_lock(&env_tlb(env)->c.lock);
-     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-         unsigned int i;
--        unsigned int n = tlb_n_entries(env, mmu_idx);
-+        unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
-         for (i = 0; i < n; i++) {
-             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
---
-.20.1

-[PULL 10/16] cputlb: Hoist tlb portions in tlb_mmu_resize_locked
+Deleted patch
-No functional change, but the smaller expressions make
-the code easier to read.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 35 +++++++++++++++++------------------
-file changed, 17 insertions(+), 18 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
- /**
-  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
-- * @env: CPU that owns the TLB
-- * @mmu_idx: MMU index of the TLB
-+ * @desc: The CPUTLBDesc portion of the TLB
-+ * @fast: The CPUTLBDescFast portion of the same TLB
-  *
-  * Called with tlb_lock_held.
-  *
-@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
-  * high), since otherwise we are likely to have a significant amount of
-  * conflict misses.
-  */
--static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
-+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
- {
--    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
--    size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
-+    size_t old_size = tlb_n_entries(fast);
-     size_t rate;
-     size_t new_size = old_size;
-     int64_t now = get_clock_realtime();
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
-         return;
-     }
--    g_free(env_tlb(env)->f[mmu_idx].table);
--    g_free(env_tlb(env)->d[mmu_idx].iotlb);
-+    g_free(fast->table);
-+    g_free(desc->iotlb);
-     tlb_window_reset(desc, now, 0);
-     /* desc->n_used_entries is cleared by the caller */
--    env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
--    env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
--    env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
-+    fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
-+    fast->table = g_try_new(CPUTLBEntry, new_size);
-+    desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
-+
-     /*
-      * If the allocations fail, try smaller sizes. We just freed some
-      * memory, so going back to half of new_size has a good chance of working.
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
-      * allocations to fail though, so we progressively reduce the allocation
-      * size, aborting if we cannot even allocate the smallest TLB we support.
-      */
--    while (env_tlb(env)->f[mmu_idx].table == NULL ||
--           env_tlb(env)->d[mmu_idx].iotlb == NULL) {
-+    while (fast->table == NULL || desc->iotlb == NULL) {
-         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
-             error_report("%s: %s", __func__, strerror(errno));
-             abort();
-         }
-         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
--        env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
-+        fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
--        g_free(env_tlb(env)->f[mmu_idx].table);
--        g_free(env_tlb(env)->d[mmu_idx].iotlb);
--        env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
--        env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
-+        g_free(fast->table);
-+        g_free(desc->iotlb);
-+        fast->table = g_try_new(CPUTLBEntry, new_size);
-+        desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
-     }
- }
- static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
- {
--    tlb_mmu_resize_locked(env, mmu_idx);
-+    tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
-     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
-     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
---
-.20.1

-[PULL 11/16] cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
+Deleted patch
-No functional change, but the smaller expressions make
-the code easier to read.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 19 ++++++++++---------
-file changed, 10 insertions(+), 9 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
- static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
- {
--    tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
--    env_tlb(env)->d[mmu_idx].n_used_entries = 0;
--    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
--    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
--    env_tlb(env)->d[mmu_idx].vindex = 0;
--    memset(env_tlb(env)->f[mmu_idx].table, -1,
--           sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
--    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
--           sizeof(env_tlb(env)->d[0].vtable));
-+    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-+    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
-+
-+    tlb_mmu_resize_locked(desc, fast);
-+    desc->n_used_entries = 0;
-+    desc->large_page_addr = -1;
-+    desc->large_page_mask = -1;
-+    desc->vindex = 0;
-+    memset(fast->table, -1, sizeof_tlb(fast));
-+    memset(desc->vtable, -1, sizeof(desc->vtable));
- }
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
---
-.20.1

-[PULL 12/16] cputlb: Split out tlb_mmu_flush_locked
+Deleted patch
-We will want to be able to flush a tlb without resizing.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 15 ++++++++++-----
-file changed, 10 insertions(+), 5 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
-     }
- }
--static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-+static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
- {
--    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
--    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
--
--    tlb_mmu_resize_locked(desc, fast);
-     desc->n_used_entries = 0;
-     desc->large_page_addr = -1;
-     desc->large_page_mask = -1;
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-     memset(desc->vtable, -1, sizeof(desc->vtable));
- }
-+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-+{
-+    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-+    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
-+
-+    tlb_mmu_resize_locked(desc, fast);
-+    tlb_mmu_flush_locked(desc, fast);
-+}
-+
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
- {
-     env_tlb(env)->d[mmu_idx].n_used_entries++;
---
-.20.1

-[PULL 13/16] cputlb: Partially merge tlb_dyn_init into tlb_init
+[PULL 1/4] tcg/optimize: Fix folding of vector ops
-Merge into the only caller, but at the same time split
+Bitwise operations are easy to fold, because the operation is
-out tlb_mmu_init to initialize a single tlb entry.
+identical regardless of element size.  But add and sub need
 extra element size info that is not currently propagated.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Fixes: 2f9f08ba43d
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
+Cc: qemu-stable@nongnu.org
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/cputlb.c | 33 ++++++++++++++++-----------------
+ tcg/optimize.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
-file changed, 16 insertions(+), 17 deletions(-)
+file changed, 38 insertions(+), 11 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
+diff --git a/tcg/optimize.c b/tcg/optimize.c
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
+--- a/tcg/optimize.c
-+++ b/accel/tcg/cputlb.c
++++ b/tcg/optimize.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
+@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
-     desc->window_max_entries = max_entries;
+     CASE_OP_32_64(mul):
          return x * y;
 -    CASE_OP_32_64(and):
 +    CASE_OP_32_64_VEC(and):
          return x & y;
 -    CASE_OP_32_64(or):
 +    CASE_OP_32_64_VEC(or):
          return x | y;
 -    CASE_OP_32_64(xor):
 +    CASE_OP_32_64_VEC(xor):
          return x ^ y;
      case INDEX_op_shl_i32:
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
      case INDEX_op_rotl_i64:
          return rol64(x, y & 63);
 -    CASE_OP_32_64(not):
 +    CASE_OP_32_64_VEC(not):
          return ~x;
      CASE_OP_32_64(neg):
          return -x;
 -    CASE_OP_32_64(andc):
 +    CASE_OP_32_64_VEC(andc):
          return x & ~y;
 -    CASE_OP_32_64(orc):
 +    CASE_OP_32_64_VEC(orc):
          return x | ~y;
      CASE_OP_32_64(eqv):
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
      return false;
  }
--static void tlb_dyn_init(CPUArchState *env)
++static bool fold_commutative(OptContext *ctx, TCGOp *op)
 -{
 -    int i;
 -
 -    for (i = 0; i < NB_MMU_MODES; i++) {
 -        CPUTLBDesc *desc = &env_tlb(env)->d[i];
 -        size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
 -
 -        tlb_window_reset(desc, get_clock_realtime(), 0);
 -        desc->n_used_entries = 0;
 -        env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
 -        env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
 -        env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
 -    }
 -}
 -
  /**
   * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
   * @desc: The CPUTLBDesc portion of the TLB
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
      tlb_mmu_flush_locked(desc, fast);
  }
 +static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
 +{
-+    size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
++    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
-+
++    return false;
 +    tlb_window_reset(desc, now, 0);
 +    desc->n_used_entries = 0;
 +    fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
 +    fast->table = g_new(CPUTLBEntry, n_entries);
 +    desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
 +}
 +
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
+ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
  {
-     env_tlb(env)->d[mmu_idx].n_used_entries++;
+     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
-@@ -XXX,XX +XXX,XX @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
+@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
- void tlb_init(CPUState *cpu)
+     return false;
  }
 +/* We cannot as yet do_constant_folding with vectors. */
 +static bool fold_add_vec(OptContext *ctx, TCGOp *op)
 +{
 +    if (fold_commutative(ctx, op) ||
 +        fold_xi_to_x(ctx, op, 0)) {
 +        return true;
 +    }
 +    return false;
 +}
 +
  static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
  {
-     CPUArchState *env = cpu->env_ptr;
+     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
-+    int64_t now = get_clock_realtime();
+@@ -XXX,XX +XXX,XX @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
-+    int i;
+     return false;
      qemu_spin_init(&env_tlb(env)->c.lock);
      /* Ensure that cpu_reset performs a full flush.  */
      env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
 -    tlb_dyn_init(env);
 +    for (i = 0; i < NB_MMU_MODES; i++) {
 +        tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
 +    }
  }
- /* flush_all_helper: run fn across all cpus
+-static bool fold_sub(OptContext *ctx, TCGOp *op)
 +/* We cannot as yet do_constant_folding with vectors. */
 +static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
  {
 -    if (fold_const2(ctx, op) ||
 -        fold_xx_to_i(ctx, op, 0) ||
 +    if (fold_xx_to_i(ctx, op, 0) ||
          fold_xi_to_x(ctx, op, 0) ||
          fold_sub_to_neg(ctx, op)) {
          return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
      return false;
  }
 +static bool fold_sub(OptContext *ctx, TCGOp *op)
 +{
 +    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
 +}
 +
  static bool fold_sub2(OptContext *ctx, TCGOp *op)
  {
      return fold_addsub2(ctx, op, false);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
           * Sorted alphabetically by opcode as much as possible.
           */
          switch (opc) {
 -        CASE_OP_32_64_VEC(add):
 +        CASE_OP_32_64(add):
              done = fold_add(&ctx, op);
              break;
 +        case INDEX_op_add_vec:
 +            done = fold_add_vec(&ctx, op);
 +            break;
          CASE_OP_32_64(add2):
              done = fold_add2(&ctx, op);
              break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          CASE_OP_32_64(sextract):
              done = fold_sextract(&ctx, op);
              break;
 -        CASE_OP_32_64_VEC(sub):
 +        CASE_OP_32_64(sub):
              done = fold_sub(&ctx, op);
              break;
 +        case INDEX_op_sub_vec:
 +            done = fold_sub_vec(&ctx, op);
 +            break;
          CASE_OP_32_64(sub2):
              done = fold_sub2(&ctx, op);
              break;
 --
-.20.1
+.25.1

-[PULL 16/16] scripts/git.orderfile: Display decodetree before C source
+[PULL 2/4] linux-user: Fix trivial build error on loongarch64 hosts
-From: Philippe Mathieu-Daudé <philmd@redhat.com>
+From: Philippe Mathieu-Daudé <f4bug@amsat.org>
-To avoid scrolling each instruction when reviewing tcg
+When building using GCC 8.3.0 on loongarch64 (Loongnix) we get:
 helpers written for the decodetree script, display the
 .decode files (similar to header declarations) before
 the C source (implementation of previous declarations).
-Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+  In file included from ../linux-user/signal.c:33:
-Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+  ../linux-user/host/loongarch64/host-signal.h: In function ‘host_signal_write’:
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+  ../linux-user/host/loongarch64/host-signal.h:57:9: error: a label can only be part of a statement and a declaration is not a statement
-Message-Id: <20191230082856.30556-1-philmd@redhat.com>
+         uint32_t sel = (insn >> 15) & 0b11111111111;
          ^~~~~~~~
 We don't use the 'sel' variable more than once, so drop it.
 Meson output for the record:
   Host machine cpu family: loongarch64
   Host machine cpu: loongarch64
   C compiler for the host machine: cc (gcc 8.3.0 "cc (Loongnix 8.3.0-6.lnd.vec.27) 8.3.0")
   C linker for the host machine: cc ld.bfd 2.31.1-system
 Fixes: ad812c3bd65 ("linux-user: Implement CPU-specific signal handler for loongarch64 hosts")
 Reported-by: Song Gao <gaosong@loongson.cn>
 Suggested-by: Song Gao <gaosong@loongson.cn>
 Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Reviewed-by: WANG Xuerui <git@xen0n.name>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20220104215027.2180972-1-f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- scripts/git.orderfile | 3 +++
+ linux-user/host/loongarch64/host-signal.h | 4 +---
-file changed, 3 insertions(+)
+file changed, 1 insertion(+), 3 deletions(-)
-diff --git a/scripts/git.orderfile b/scripts/git.orderfile
+diff --git a/linux-user/host/loongarch64/host-signal.h b/linux-user/host/loongarch64/host-signal.h
 index XXXXXXX..XXXXXXX 100644
---- a/scripts/git.orderfile
+--- a/linux-user/host/loongarch64/host-signal.h
-+++ b/scripts/git.orderfile
++++ b/linux-user/host/loongarch64/host-signal.h
-@@ -XXX,XX +XXX,XX @@ qga/*.json
+@@ -XXX,XX +XXX,XX @@ static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
- # headers
+         }
- *.h
+         break;
+     case 0b001110: /* indexed, atomic, bounds-checking memory operations */
-+# decoding tree specification
+-        uint32_t sel = (insn >> 15) & 0b11111111111;
-+*.decode
+-
-+
+-        switch (sel) {
- # code
++        switch ((insn >> 15) & 0b11111111111) {
- *.c
+         case 0b00000100000: /* stx.b */
          case 0b00000101000: /* stx.h */
          case 0b00000110000: /* stx.w */
 --
-.20.1
+.25.1

-[PULL 15/16] cputlb: Hoist timestamp outside of loops over tlbs
+[PULL 3/4] sysemu: Cleanup qemu_run_machine_init_done_notifiers()
-Do not call get_clock_realtime() in tlb_mmu_resize_locked,
+From: Xiaoyao Li <xiaoyao.li@intel.com>
 but hoist outside of any loop over a set of tlbs.  This is
 only two (indirect) callers, tlb_flush_by_mmuidx_async_work
 and tlb_flush_page_locked, so not onerous.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Remove qemu_run_machine_init_done_notifiers() since no implementation
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
+and user.
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
 Fixes: f66dc8737c9 ("vl: move all generic initialization out of vl.c")
 Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Message-Id: <20220104024136.1433545-1-xiaoyao.li@intel.com>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/cputlb.c | 14 ++++++++------
+ include/sysemu/sysemu.h | 1 -
-file changed, 8 insertions(+), 6 deletions(-)
+file changed, 1 deletion(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
+diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
+--- a/include/sysemu/sysemu.h
-+++ b/accel/tcg/cputlb.c
++++ b/include/sysemu/sysemu.h
-@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
+@@ -XXX,XX +XXX,XX @@ extern bool qemu_uuid_set;
-  * high), since otherwise we are likely to have a significant amount of
+ void qemu_add_exit_notifier(Notifier *notify);
-  * conflict misses.
+ void qemu_remove_exit_notifier(Notifier *notify);
-  */
--static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
+-void qemu_run_machine_init_done_notifiers(void);
-+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
+ void qemu_add_machine_init_done_notifier(Notifier *notify);
-+                                  int64_t now)
+ void qemu_remove_machine_init_done_notifier(Notifier *notify);
- {
      size_t old_size = tlb_n_entries(fast);
      size_t rate;
      size_t new_size = old_size;
 -    int64_t now = get_clock_realtime();
      int64_t window_len_ms = 100;
      int64_t window_len_ns = window_len_ms * 1000 * 1000;
      bool window_expired = now > desc->window_begin_ns + window_len_ns;
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
      memset(desc->vtable, -1, sizeof(desc->vtable));
  }
 -static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 +static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
 +                                        int64_t now)
  {
      CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
      CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
 -    tlb_mmu_resize_locked(desc, fast);
 +    tlb_mmu_resize_locked(desc, fast, now);
      tlb_mmu_flush_locked(desc, fast);
  }
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
      CPUArchState *env = cpu->env_ptr;
      uint16_t asked = data.host_int;
      uint16_t all_dirty, work, to_clean;
 +    int64_t now = get_clock_realtime();
      assert_cpu_is_self(cpu);
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
      for (work = to_clean; work != 0; work &= work - 1) {
          int mmu_idx = ctz32(work);
 -        tlb_flush_one_mmuidx_locked(env, mmu_idx);
 +        tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
      }
      qemu_spin_unlock(&env_tlb(env)->c.lock);
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
          tlb_debug("forcing full flush midx %d ("
                    TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
                    midx, lp_addr, lp_mask);
 -        tlb_flush_one_mmuidx_locked(env, midx);
 +        tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
      } else {
          if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
              tlb_n_used_entries_dec(env, midx);
 --
-.20.1
+.25.1

-[PULL 14/16] cputlb: Initialize tlbs as flushed
+[PULL 4/4] common-user: Fix tail calls to safe_syscall_set_errno_tail
-There's little point in leaving these data structures half initialized,
+For the ABIs in which the syscall return register is not
-and relying on a flush to be done during reset.
+also the first function argument register, move the errno
 value into the correct place.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Fixes: a3310c0397e2 ("linux-user: Move syscall error detection into safe_syscall_base")
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
+Reported-by: Laurent Vivier <laurent@vivier.eu>
 Tested-by: Laurent Vivier <laurent@vivier.eu>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Message-Id: <20220104190454.542225-1-richard.henderson@linaro.org>
 ---
- accel/tcg/cputlb.c | 5 +++--
+ common-user/host/i386/safe-syscall.inc.S   | 1 +
-file changed, 3 insertions(+), 2 deletions(-)
+ common-user/host/mips/safe-syscall.inc.S   | 1 +
  common-user/host/x86_64/safe-syscall.inc.S | 1 +
 files changed, 3 insertions(+)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
+diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
+--- a/common-user/host/i386/safe-syscall.inc.S
-+++ b/accel/tcg/cputlb.c
++++ b/common-user/host/i386/safe-syscall.inc.S
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
+@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
-     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
+         pop     %ebp
-     fast->table = g_new(CPUTLBEntry, n_entries);
+         .cfi_adjust_cfa_offset -4
-     desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
+         .cfi_restore ebp
-+    tlb_mmu_flush_locked(desc, fast);
++        mov     %eax, (%esp)
- }
+         jmp     safe_syscall_set_errno_tail
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
+         .cfi_endproc
-@@ -XXX,XX +XXX,XX @@ void tlb_init(CPUState *cpu)
+diff --git a/common-user/host/mips/safe-syscall.inc.S b/common-user/host/mips/safe-syscall.inc.S
+index XXXXXXX..XXXXXXX 100644
-     qemu_spin_init(&env_tlb(env)->c.lock);
+--- a/common-user/host/mips/safe-syscall.inc.S
++++ b/common-user/host/mips/safe-syscall.inc.S
--    /* Ensure that cpu_reset performs a full flush.  */
+@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
--    env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
+:      USE_ALT_CP(t0)
-+    /* All tlbs are initialized flushed. */
+         SETUP_GPX(t1)
-+    env_tlb(env)->c.dirty = 0;
+         SETUP_GPX64(t0, t1)
++        move    a0, v0
-     for (i = 0; i < NB_MMU_MODES; i++) {
+         PTR_LA  t9, safe_syscall_set_errno_tail
-         tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
+         jr      t9
 diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S
 index XXXXXXX..XXXXXXX 100644
 --- a/common-user/host/x86_64/safe-syscall.inc.S
 +++ b/common-user/host/x86_64/safe-syscall.inc.S
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
 :      pop     %rbp
          .cfi_def_cfa_offset 8
          .cfi_restore rbp
 +        mov     %eax, %edi
          jmp     safe_syscall_set_errno_tail
          .cfi_endproc
 --
-.20.1
+.25.1

The following changes since commit 3e08b2b9cb64bff2b73fa9128c0e49bfcde0dd40:

Merge remote-tracking branch 'remotes/philmd-gitlab/tags/edk2-next-20200121' into staging (2020-01-21 15:29:25 +0000)

are available in the Git repository at:

https://github.com/rth7680/qemu.git tags/pull-tcg-20200121

for you to fetch changes up to 75fa376cdab5e5db2c7fdd107358e16f95503ac6:

scripts/git.orderfile: Display decodetree before C source (2020-01-21 15:26:09 -1000)

----------------------------------------------------------------
Remove another limit to NB_MMU_MODES.
Fix compilation using uclibc.
Fix defaulting of -accel parameters.
Tidy cputlb basic routines.
Adjust git.orderfile for decodetree.

----------------------------------------------------------------
Carlos Santos (1):
      util/cacheinfo: fix crash when compiling with uClibc

Philippe Mathieu-Daudé (1):
      scripts/git.orderfile: Display decodetree before C source

Richard Henderson (14):
      cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
      vl: Remove unused variable in configure_accelerators
      vl: Reduce scope of variables in configure_accelerators
      vl: Remove useless test in configure_accelerators
      vl: Only choose enabled accelerators in configure_accelerators
      cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
      cputlb: Make tlb_n_entries private to cputlb.c
      cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
      cputlb: Hoist tlb portions in tlb_mmu_resize_locked
      cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
      cputlb: Split out tlb_mmu_flush_locked
      cputlb: Partially merge tlb_dyn_init into tlb_init
      cputlb: Initialize tlbs as flushed
      cputlb: Hoist timestamp outside of loops over tlbs

In target/arm we will shortly have "too many" mmu_idx.
The current minimum barrier is caused by the way in which
tlb_flush_page_by_mmuidx is coded.

We can remove this limitation by allocating memory for
consumption by the worker.  Let us assume that this is
the unlikely case, as will be the case for the majority
of targets which have so far satisfied the BUILD_BUG_ON,
and only allocate memory when necessary.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 132 insertions(+), 35 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
     }
 }
 
-/* As we are going to hijack the bottom bits of the page address for a
- * mmuidx bit mask we need to fail to build if we can't do that
+/**
+ * tlb_flush_page_by_mmuidx_async_0:
+ * @cpu: cpu on which to flush
+ * @addr: page of virtual address to flush
+ * @idxmap: set of mmu_idx to flush
+ *
+ * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
+ * at @addr from the tlbs indicated by @idxmap from @cpu.
  */
-QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
-
-static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
-                                                run_on_cpu_data data)
+static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
+                                             target_ulong addr,
+                                             uint16_t idxmap)
 {
     CPUArchState *env = cpu->env_ptr;
-    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
-    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
-    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
     int mmu_idx;
 
     assert_cpu_is_self(cpu);
 
-    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
-              addr, mmu_idx_bitmap);
+    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
 
     qemu_spin_lock(&env_tlb(env)->c.lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-        if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
+        if ((idxmap >> mmu_idx) & 1) {
             tlb_flush_page_locked(env, mmu_idx, addr);
         }
     }
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
     tb_flush_jmp_cache(cpu, addr);
 }
 
+/**
+ * tlb_flush_page_by_mmuidx_async_1:
+ * @cpu: cpu on which to flush
+ * @data: encoded addr + idxmap
+ *
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
+ * async_run_on_cpu.  The idxmap parameter is encoded in the page
+ * offset of the target_ptr field.  This limits the set of mmu_idx
+ * that can be passed via this method.
+ */
+static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
+                                             run_on_cpu_data data)
+{
+    target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
+    target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
+    uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
+
+    tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
+}
+
+typedef struct {
+    target_ulong addr;
+    uint16_t idxmap;
+} TLBFlushPageByMMUIdxData;
+
+/**
+ * tlb_flush_page_by_mmuidx_async_2:
+ * @cpu: cpu on which to flush
+ * @data: allocated addr + idxmap
+ *
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
+ * async_run_on_cpu.  The addr+idxmap parameters are stored in a
+ * TLBFlushPageByMMUIdxData structure that has been allocated
+ * specifically for this helper.  Free the structure when done.
+ */
+static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
+                                             run_on_cpu_data data)
+{
+    TLBFlushPageByMMUIdxData *d = data.host_ptr;
+
+    tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
+    g_free(d);
+}
+
 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
 {
-    target_ulong addr_and_mmu_idx;
-
     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
 
     /* This should already be page aligned */
-    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
-    addr_and_mmu_idx |= idxmap;
+    addr &= TARGET_PAGE_MASK;
 
-    if (!qemu_cpu_is_self(cpu)) {
-        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
-                         RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    if (qemu_cpu_is_self(cpu)) {
+        tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
+    } else if (idxmap < TARGET_PAGE_SIZE) {
+        /*
+         * Most targets have only a few mmu_idx.  In the case where
+         * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
+         * allocating memory for this operation.
+         */
+        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
     } else {
-        tlb_flush_page_by_mmuidx_async_work(
-            cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+        TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
+
+        /* Otherwise allocate a structure, freed by the worker.  */
+        d->addr = addr;
+        d->idxmap = idxmap;
+        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
+                         RUN_ON_CPU_HOST_PTR(d));
     }
 }
 
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
                                        uint16_t idxmap)
 {
-    const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
-    target_ulong addr_and_mmu_idx;
-
     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 
     /* This should already be page aligned */
-    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
-    addr_and_mmu_idx |= idxmap;
+    addr &= TARGET_PAGE_MASK;
 
-    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
-    fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    /*
+     * Allocate memory to hold addr+idxmap only when needed.
+     * See tlb_flush_page_by_mmuidx for details.
+     */
+    if (idxmap < TARGET_PAGE_SIZE) {
+        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
+    } else {
+        CPUState *dst_cpu;
+
+        /* Allocate a separate data block for each destination cpu.  */
+        CPU_FOREACH(dst_cpu) {
+            if (dst_cpu != src_cpu) {
+                TLBFlushPageByMMUIdxData *d
+                    = g_new(TLBFlushPageByMMUIdxData, 1);
+
+                d->addr = addr;
+                d->idxmap = idxmap;
+                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
+                                 RUN_ON_CPU_HOST_PTR(d));
+            }
+        }
+    }
+
+    tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
 }
 
 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
                                               target_ulong addr,
                                               uint16_t idxmap)
 {
-    const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
-    target_ulong addr_and_mmu_idx;
-
     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 
     /* This should already be page aligned */
-    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
-    addr_and_mmu_idx |= idxmap;
+    addr &= TARGET_PAGE_MASK;
 
-    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
-    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    /*
+     * Allocate memory to hold addr+idxmap only when needed.
+     * See tlb_flush_page_by_mmuidx for details.
+     */
+    if (idxmap < TARGET_PAGE_SIZE) {
+        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
+        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
+                              RUN_ON_CPU_TARGET_PTR(addr | idxmap));
+    } else {
+        CPUState *dst_cpu;
+        TLBFlushPageByMMUIdxData *d;
+
+        /* Allocate a separate data block for each destination cpu.  */
+        CPU_FOREACH(dst_cpu) {
+            if (dst_cpu != src_cpu) {
+                d = g_new(TLBFlushPageByMMUIdxData, 1);
+                d->addr = addr;
+                d->idxmap = idxmap;
+                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
+                                 RUN_ON_CPU_HOST_PTR(d));
+            }
+        }
+
+        d = g_new(TLBFlushPageByMMUIdxData, 1);
+        d->addr = addr;
+        d->idxmap = idxmap;
+        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
+                              RUN_ON_CPU_HOST_PTR(d));
+    }
 }
 
 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
-- 
2.20.1

From: Carlos Santos <casantos@redhat.com>

uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE
but the corresponding sysconf calls returns -1, which is a valid result,
meaning that the limit is indeterminate.

Handle this situation using the fallback values instead of crashing due
to an assertion failure.

Signed-off-by: Carlos Santos <casantos@redhat.com>
Message-Id: <20191017123713.30192-1-casantos@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 util/cacheinfo.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/util/cacheinfo.c b/util/cacheinfo.c
index XXXXXXX..XXXXXXX 100644
--- a/util/cacheinfo.c
+++ b/util/cacheinfo.c
@@ -XXX,XX +XXX,XX @@ static void sys_cache_info(int *isize, int *dsize)
 static void sys_cache_info(int *isize, int *dsize)
 {
 # ifdef _SC_LEVEL1_ICACHE_LINESIZE
-    *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+    if (tmp_isize > 0) {
+        *isize = tmp_isize;
+    }
 # endif
 # ifdef _SC_LEVEL1_DCACHE_LINESIZE
-    *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+    if (tmp_dsize > 0) {
+        *dsize = tmp_dsize;
+    }
 # endif
 }
 #endif /* sys_cache_info */
-- 
2.20.1

The accel_initialised variable no longer has any setters.

Fixes: 6f6e1698a68c
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 vl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vl.c b/vl.c
index XXXXXXX..XXXXXXX 100644
--- a/vl.c
+++ b/vl.c
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
 {
     const char *accel;
     char **accel_list, **tmp;
-    bool accel_initialised = false;
     bool init_failed = false;
 
     qemu_opts_foreach(qemu_find_opts("icount"),
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
 
         accel_list = g_strsplit(accel, ":", 0);
 
-        for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
+        for (tmp = accel_list; tmp && *tmp; tmp++) {
             /*
              * Filter invalid accelerators here, to prevent obscenities
              * such as "-machine accel=tcg,,thread=single".
-- 
2.20.1

The accel_list and tmp variables are only used when manufacturing
-machine accel, options based on -accel.

Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 vl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vl.c b/vl.c
index XXXXXXX..XXXXXXX 100644
--- a/vl.c
+++ b/vl.c
@@ -XXX,XX +XXX,XX @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
 static void configure_accelerators(const char *progname)
 {
     const char *accel;
-    char **accel_list, **tmp;
     bool init_failed = false;
 
     qemu_opts_foreach(qemu_find_opts("icount"),
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
 
     accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
     if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
+        char **accel_list, **tmp;
+
         if (accel == NULL) {
             /* Select the default accelerator */
             if (!accel_find("tcg") && !accel_find("kvm")) {
-- 
2.20.1

By choosing "tcg:kvm" when kvm is not enabled, we generate
an incorrect warning: "invalid accelerator kvm".

At the same time, use g_str_has_suffix rather than open-coding
the same operation.

Presumably the inverse is also true with --disable-tcg.

Fixes: 28a0961757fc
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 vl.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/vl.c b/vl.c
index XXXXXXX..XXXXXXX 100644
--- a/vl.c
+++ b/vl.c
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
 
         if (accel == NULL) {
             /* Select the default accelerator */
-            if (!accel_find("tcg") && !accel_find("kvm")) {
-                error_report("No accelerator selected and"
-                             " no default accelerator available");
-                exit(1);
-            } else {
-                int pnlen = strlen(progname);
-                if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) {
+            bool have_tcg = accel_find("tcg");
+            bool have_kvm = accel_find("kvm");
+
+            if (have_tcg && have_kvm) {
+                if (g_str_has_suffix(progname, "kvm")) {
                     /* If the program name ends with "kvm", we prefer KVM */
                     accel = "kvm:tcg";
                 } else {
                     accel = "tcg:kvm";
                 }
+            } else if (have_kvm) {
+                accel = "kvm";
+            } else if (have_tcg) {
+                accel = "tcg";
+            } else {
+                error_report("No accelerator selected and"
+                             " no default accelerator available");
+                exit(1);
             }
         }
-
         accel_list = g_strsplit(accel, ":", 0);
 
         for (tmp = accel_list; *tmp; tmp++) {
-- 
2.20.1

There is only one caller for tlb_table_flush_by_mmuidx.  Place
the result at the earlier line number, due to an expected user
in the near future.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
     }
 }
 
-static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 {
     tlb_mmu_resize_locked(env, mmu_idx);
-    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
+    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
+    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
+    env_tlb(env)->d[mmu_idx].vindex = 0;
+    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
+    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
+           sizeof(env_tlb(env)->d[0].vtable));
 }
 
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
@@ -XXX,XX +XXX,XX @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
     *pelide = elide;
 }
 
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-{
-    tlb_table_flush_by_mmuidx(env, mmu_idx);
-    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-    env_tlb(env)->d[mmu_idx].vindex = 0;
-    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
-           sizeof(env_tlb(env)->d[0].vtable));
-}
-
 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 {
     CPUArchState *env = cpu->env_ptr;
-- 
2.20.1

There are no users of this function outside cputlb.c,
and its interface will change in the next patch.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu_ldst.h | 5 -----
 accel/tcg/cputlb.c      | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
     return (addr >> TARGET_PAGE_BITS) & size_mask;
 }
 
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
-{
-    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
-}
-
 /* Find the TLB entry corresponding to the mmu_idx + address pair.  */
 static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
                                      target_ulong addr)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
 
+static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
+{
+    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
+}
+
 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
 {
     return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
-- 
2.20.1

We do not need the entire CPUArchState to compute these values.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
 
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
+static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
 {
-    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
+    return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
 }
 
-static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
+static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
 {
-    return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
+    return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
 }
 
 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
 {
     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-    size_t old_size = tlb_n_entries(env, mmu_idx);
+    size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
     size_t rate;
     size_t new_size = old_size;
     int64_t now = get_clock_realtime();
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
     env_tlb(env)->d[mmu_idx].vindex = 0;
-    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
+    memset(env_tlb(env)->f[mmu_idx].table, -1,
+           sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
     memset(env_tlb(env)->d[mmu_idx].vtable, -1,
            sizeof(env_tlb(env)->d[0].vtable));
 }
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
     qemu_spin_lock(&env_tlb(env)->c.lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         unsigned int i;
-        unsigned int n = tlb_n_entries(env, mmu_idx);
+        unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
 
         for (i = 0; i < n; i++) {
             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
-- 
2.20.1

No functional change, but the smaller expressions make
the code easier to read.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
 
 /**
  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
- * @env: CPU that owns the TLB
- * @mmu_idx: MMU index of the TLB
+ * @desc: The CPUTLBDesc portion of the TLB
+ * @fast: The CPUTLBDescFast portion of the same TLB
  *
  * Called with tlb_lock_held.
  *
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
  * high), since otherwise we are likely to have a significant amount of
  * conflict misses.
  */
-static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 {
-    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-    size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
+    size_t old_size = tlb_n_entries(fast);
     size_t rate;
     size_t new_size = old_size;
     int64_t now = get_clock_realtime();
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
         return;
     }
 
-    g_free(env_tlb(env)->f[mmu_idx].table);
-    g_free(env_tlb(env)->d[mmu_idx].iotlb);
+    g_free(fast->table);
+    g_free(desc->iotlb);
 
     tlb_window_reset(desc, now, 0);
     /* desc->n_used_entries is cleared by the caller */
-    env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
-    env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
-    env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
+    fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
+    fast->table = g_try_new(CPUTLBEntry, new_size);
+    desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
+
     /*
      * If the allocations fail, try smaller sizes. We just freed some
      * memory, so going back to half of new_size has a good chance of working.
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
      * allocations to fail though, so we progressively reduce the allocation
      * size, aborting if we cannot even allocate the smallest TLB we support.
      */
-    while (env_tlb(env)->f[mmu_idx].table == NULL ||
-           env_tlb(env)->d[mmu_idx].iotlb == NULL) {
+    while (fast->table == NULL || desc->iotlb == NULL) {
         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
             error_report("%s: %s", __func__, strerror(errno));
             abort();
         }
         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
-        env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
+        fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 
-        g_free(env_tlb(env)->f[mmu_idx].table);
-        g_free(env_tlb(env)->d[mmu_idx].iotlb);
-        env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
-        env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
+        g_free(fast->table);
+        g_free(desc->iotlb);
+        fast->table = g_try_new(CPUTLBEntry, new_size);
+        desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
     }
 }
 
 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 {
-    tlb_mmu_resize_locked(env, mmu_idx);
+    tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-- 
2.20.1

No functional change, but the smaller expressions make
the code easier to read.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 
 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 {
-    tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
-    env_tlb(env)->d[mmu_idx].n_used_entries = 0;
-    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-    env_tlb(env)->d[mmu_idx].vindex = 0;
-    memset(env_tlb(env)->f[mmu_idx].table, -1,
-           sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
-    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
-           sizeof(env_tlb(env)->d[0].vtable));
+    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
+    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
+
+    tlb_mmu_resize_locked(desc, fast);
+    desc->n_used_entries = 0;
+    desc->large_page_addr = -1;
+    desc->large_page_mask = -1;
+    desc->vindex = 0;
+    memset(fast->table, -1, sizeof_tlb(fast));
+    memset(desc->vtable, -1, sizeof(desc->vtable));
 }
 
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
-- 
2.20.1

We will want to be able to flush a tlb without resizing.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
     }
 }
 
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
+static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 {
-    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
-
-    tlb_mmu_resize_locked(desc, fast);
     desc->n_used_entries = 0;
     desc->large_page_addr = -1;
     desc->large_page_mask = -1;
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
     memset(desc->vtable, -1, sizeof(desc->vtable));
 }
 
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
+{
+    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
+    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
+
+    tlb_mmu_resize_locked(desc, fast);
+    tlb_mmu_flush_locked(desc, fast);
+}
+
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
 {
     env_tlb(env)->d[mmu_idx].n_used_entries++;
-- 
2.20.1

Merge into the only caller, but at the same time split
out tlb_mmu_init to initialize a single tlb entry.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
     desc->window_max_entries = max_entries;
 }
 
-static void tlb_dyn_init(CPUArchState *env)
-{
-    int i;
-
-    for (i = 0; i < NB_MMU_MODES; i++) {
-        CPUTLBDesc *desc = &env_tlb(env)->d[i];
-        size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
-
-        tlb_window_reset(desc, get_clock_realtime(), 0);
-        desc->n_used_entries = 0;
-        env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
-        env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
-        env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
-    }
-}
-
 /**
  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
  * @desc: The CPUTLBDesc portion of the TLB
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
     tlb_mmu_flush_locked(desc, fast);
 }
 
+static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
+{
+    size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
+
+    tlb_window_reset(desc, now, 0);
+    desc->n_used_entries = 0;
+    fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
+    fast->table = g_new(CPUTLBEntry, n_entries);
+    desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
+}
+
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
 {
     env_tlb(env)->d[mmu_idx].n_used_entries++;
@@ -XXX,XX +XXX,XX @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
 void tlb_init(CPUState *cpu)
 {
     CPUArchState *env = cpu->env_ptr;
+    int64_t now = get_clock_realtime();
+    int i;
 
     qemu_spin_init(&env_tlb(env)->c.lock);
 
     /* Ensure that cpu_reset performs a full flush.  */
     env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
 
-    tlb_dyn_init(env);
+    for (i = 0; i < NB_MMU_MODES; i++) {
+        tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
+    }
 }
 
 /* flush_all_helper: run fn across all cpus
-- 
2.20.1

There's little point in leaving these data structures half initialized,
and relying on a flush to be done during reset.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
     fast->table = g_new(CPUTLBEntry, n_entries);
     desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
+    tlb_mmu_flush_locked(desc, fast);
 }
 
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
@@ -XXX,XX +XXX,XX @@ void tlb_init(CPUState *cpu)
 
     qemu_spin_init(&env_tlb(env)->c.lock);
 
-    /* Ensure that cpu_reset performs a full flush.  */
-    env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
+    /* All tlbs are initialized flushed. */
+    env_tlb(env)->c.dirty = 0;
 
     for (i = 0; i < NB_MMU_MODES; i++) {
         tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
-- 
2.20.1

Do not call get_clock_realtime() in tlb_mmu_resize_locked,
but hoist outside of any loop over a set of tlbs.  This is
only two (indirect) callers, tlb_flush_by_mmuidx_async_work
and tlb_flush_page_locked, so not onerous.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
  * high), since otherwise we are likely to have a significant amount of
  * conflict misses.
  */
-static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
+                                  int64_t now)
 {
     size_t old_size = tlb_n_entries(fast);
     size_t rate;
     size_t new_size = old_size;
-    int64_t now = get_clock_realtime();
     int64_t window_len_ms = 100;
     int64_t window_len_ns = window_len_ms * 1000 * 1000;
     bool window_expired = now > desc->window_begin_ns + window_len_ns;
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
     memset(desc->vtable, -1, sizeof(desc->vtable));
 }
 
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
+                                        int64_t now)
 {
     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
     CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
 
-    tlb_mmu_resize_locked(desc, fast);
+    tlb_mmu_resize_locked(desc, fast, now);
     tlb_mmu_flush_locked(desc, fast);
 }
 
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
     CPUArchState *env = cpu->env_ptr;
     uint16_t asked = data.host_int;
     uint16_t all_dirty, work, to_clean;
+    int64_t now = get_clock_realtime();
 
     assert_cpu_is_self(cpu);
 
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 
     for (work = to_clean; work != 0; work &= work - 1) {
         int mmu_idx = ctz32(work);
-        tlb_flush_one_mmuidx_locked(env, mmu_idx);
+        tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
     }
 
     qemu_spin_unlock(&env_tlb(env)->c.lock);
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
         tlb_debug("forcing full flush midx %d ("
                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
                   midx, lp_addr, lp_mask);
-        tlb_flush_one_mmuidx_locked(env, midx);
+        tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
     } else {
         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
             tlb_n_used_entries_dec(env, midx);
-- 
2.20.1

The following changes since commit 67e41fe0cfb62e6cdfa659f0155417d17e5274ea:

Merge tag 'pull-ppc-20220104' of https://github.com/legoater/qemu into staging (2022-01-04 07:23:27 -0800)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220104

for you to fetch changes up to d7478d4229f0a2b2817a55487e6b17081099fae4:

common-user: Fix tail calls to safe_syscall_set_errno_tail (2022-01-04 15:41:03 -0800)

----------------------------------------------------------------
Fix for safe_syscall_base.
Fix for folding of vector add/sub.
Fix build on loongarch64 with gcc 8.
Remove decl for qemu_run_machine_init_done_notifiers.

----------------------------------------------------------------
Philippe Mathieu-Daudé (1):
      linux-user: Fix trivial build error on loongarch64 hosts

Richard Henderson (2):
      tcg/optimize: Fix folding of vector ops
      common-user: Fix tail calls to safe_syscall_set_errno_tail

Xiaoyao Li (1):
      sysemu: Cleanup qemu_run_machine_init_done_notifiers()

include/sysemu/sysemu.h                    |  1 -
 linux-user/host/loongarch64/host-signal.h  |  4 +--
 tcg/optimize.c                             | 49 +++++++++++++++++++++++-------
 common-user/host/i386/safe-syscall.inc.S   |  1 +
 common-user/host/mips/safe-syscall.inc.S   |  1 +
 common-user/host/x86_64/safe-syscall.inc.S |  1 +
 6 files changed, 42 insertions(+), 15 deletions(-)

Bitwise operations are easy to fold, because the operation is
identical regardless of element size.  But add and sub need
extra element size info that is not currently propagated.

Fixes: 2f9f08ba43d
Cc: qemu-stable@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
     CASE_OP_32_64(mul):
         return x * y;
 
-    CASE_OP_32_64(and):
+    CASE_OP_32_64_VEC(and):
         return x & y;
 
-    CASE_OP_32_64(or):
+    CASE_OP_32_64_VEC(or):
         return x | y;
 
-    CASE_OP_32_64(xor):
+    CASE_OP_32_64_VEC(xor):
         return x ^ y;
 
     case INDEX_op_shl_i32:
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
     case INDEX_op_rotl_i64:
         return rol64(x, y & 63);
 
-    CASE_OP_32_64(not):
+    CASE_OP_32_64_VEC(not):
         return ~x;
 
     CASE_OP_32_64(neg):
         return -x;
 
-    CASE_OP_32_64(andc):
+    CASE_OP_32_64_VEC(andc):
         return x & ~y;
 
-    CASE_OP_32_64(orc):
+    CASE_OP_32_64_VEC(orc):
         return x | ~y;
 
     CASE_OP_32_64(eqv):
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_commutative(OptContext *ctx, TCGOp *op)
+{
+    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+    return false;
+}
+
 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 {
     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_add_vec(OptContext *ctx, TCGOp *op)
+{
+    if (fold_commutative(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0)) {
+        return true;
+    }
+    return false;
+}
+
 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 {
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
     return false;
 }
 
-static bool fold_sub(OptContext *ctx, TCGOp *op)
+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
-        fold_xx_to_i(ctx, op, 0) ||
+    if (fold_xx_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, 0) ||
         fold_sub_to_neg(ctx, op)) {
         return true;
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_sub(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
+}
+
 static bool fold_sub2(OptContext *ctx, TCGOp *op)
 {
     return fold_addsub2(ctx, op, false);
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
          * Sorted alphabetically by opcode as much as possible.
          */
         switch (opc) {
-        CASE_OP_32_64_VEC(add):
+        CASE_OP_32_64(add):
             done = fold_add(&ctx, op);
             break;
+        case INDEX_op_add_vec:
+            done = fold_add_vec(&ctx, op);
+            break;
         CASE_OP_32_64(add2):
             done = fold_add2(&ctx, op);
             break;
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(sextract):
             done = fold_sextract(&ctx, op);
             break;
-        CASE_OP_32_64_VEC(sub):
+        CASE_OP_32_64(sub):
             done = fold_sub(&ctx, op);
             break;
+        case INDEX_op_sub_vec:
+            done = fold_sub_vec(&ctx, op);
+            break;
         CASE_OP_32_64(sub2):
             done = fold_sub2(&ctx, op);
             break;
-- 
2.25.1

From: Philippe Mathieu-Daudé <f4bug@amsat.org>

When building using GCC 8.3.0 on loongarch64 (Loongnix) we get:

In file included from ../linux-user/signal.c:33:
  ../linux-user/host/loongarch64/host-signal.h: In function ‘host_signal_write’:
  ../linux-user/host/loongarch64/host-signal.h:57:9: error: a label can only be part of a statement and a declaration is not a statement
         uint32_t sel = (insn >> 15) & 0b11111111111;
         ^~~~~~~~

We don't use the 'sel' variable more than once, so drop it.

Meson output for the record:

Host machine cpu family: loongarch64
  Host machine cpu: loongarch64
  C compiler for the host machine: cc (gcc 8.3.0 "cc (Loongnix 8.3.0-6.lnd.vec.27) 8.3.0")
  C linker for the host machine: cc ld.bfd 2.31.1-system

Fixes: ad812c3bd65 ("linux-user: Implement CPU-specific signal handler for loongarch64 hosts")
Reported-by: Song Gao <gaosong@loongson.cn>
Suggested-by: Song Gao <gaosong@loongson.cn>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220104215027.2180972-1-f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/host/loongarch64/host-signal.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/linux-user/host/loongarch64/host-signal.h b/linux-user/host/loongarch64/host-signal.h
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/host/loongarch64/host-signal.h
+++ b/linux-user/host/loongarch64/host-signal.h
@@ -XXX,XX +XXX,XX @@ static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
         }
         break;
     case 0b001110: /* indexed, atomic, bounds-checking memory operations */
-        uint32_t sel = (insn >> 15) & 0b11111111111;
-
-        switch (sel) {
+        switch ((insn >> 15) & 0b11111111111) {
         case 0b00000100000: /* stx.b */
         case 0b00000101000: /* stx.h */
         case 0b00000110000: /* stx.w */
-- 
2.25.1

For the ABIs in which the syscall return register is not
also the first function argument register, move the errno
value into the correct place.

Fixes: a3310c0397e2 ("linux-user: Move syscall error detection into safe_syscall_base")
Reported-by: Laurent Vivier <laurent@vivier.eu>
Tested-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220104190454.542225-1-richard.henderson@linaro.org>
---
 common-user/host/i386/safe-syscall.inc.S   | 1 +
 common-user/host/mips/safe-syscall.inc.S   | 1 +
 common-user/host/x86_64/safe-syscall.inc.S | 1 +
 3 files changed, 3 insertions(+)

diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S
index XXXXXXX..XXXXXXX 100644
--- a/common-user/host/i386/safe-syscall.inc.S
+++ b/common-user/host/i386/safe-syscall.inc.S
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
         pop     %ebp
         .cfi_adjust_cfa_offset -4
         .cfi_restore ebp
+        mov     %eax, (%esp)
         jmp     safe_syscall_set_errno_tail
 
         .cfi_endproc
diff --git a/common-user/host/mips/safe-syscall.inc.S b/common-user/host/mips/safe-syscall.inc.S
index XXXXXXX..XXXXXXX 100644
--- a/common-user/host/mips/safe-syscall.inc.S
+++ b/common-user/host/mips/safe-syscall.inc.S
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
 1:      USE_ALT_CP(t0)
         SETUP_GPX(t1)
         SETUP_GPX64(t0, t1)
+        move    a0, v0
         PTR_LA  t9, safe_syscall_set_errno_tail
         jr      t9
 
diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S
index XXXXXXX..XXXXXXX 100644
--- a/common-user/host/x86_64/safe-syscall.inc.S
+++ b/common-user/host/x86_64/safe-syscall.inc.S
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
 1:      pop     %rbp
         .cfi_def_cfa_offset 8
         .cfi_restore rbp
+        mov     %eax, %edi
         jmp     safe_syscall_set_errno_tail
         .cfi_endproc
 
-- 
2.25.1