Series comparison

-[Qemu-devel] [PULL for-4.1 0/7] tcg patch queue
+[PULL 0/3] tcg patch queue
-The following changes since commit 1316b1ddc8a05e418c8134243f8bff8cccbbccb1:
+The following changes since commit 2ecfc0657afa5d29a373271b342f704a1a3c6737:
-  Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2019-07-12 15:38:22 +0100)
+  Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2020-12-10' into staging (2020-12-10 17:01:05 +0000)
 are available in the Git repository at:
-  https://github.com/rth7680/qemu.git tags/pull-tcg-20190714
+  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20201210
-for you to fetch changes up to 52ba13f042714c4086416973fb88e2465e0888a1:
+for you to fetch changes up to 9e2658d62ebc23efe7df43fc0e306f129510d874:
-  tcg: Release mmap_lock on translation fault (2019-07-14 12:19:01 +0200)
+  accel/tcg: rename tcg-cpus functions to match module name (2020-12-10 17:44:10 -0600)
 ----------------------------------------------------------------
-Fixes for 3 tcg bugs
+Split CpusAccel for tcg variants
 ----------------------------------------------------------------
-Richard Henderson (7):
+Claudio Fontana (3):
-      tcg: Fix constant folding of INDEX_op_extract2_i32
+      accel/tcg: split CpusAccel into three TCG variants
-      tcg/aarch64: Fix output of extract2 opcodes
+      accel/tcg: split tcg_start_vcpu_thread
-      include/qemu/atomic.h: Add signal_barrier
+      accel/tcg: rename tcg-cpus functions to match module name
       tcg: Introduce set/clear_helper_retaddr
       tcg: Remove cpu_ld*_code_ra
       tcg: Remove duplicate #if !defined(CODE_ACCESS)
       tcg: Release mmap_lock on translation fault
- include/exec/cpu_ldst.h                   | 20 ++++++++
+ accel/tcg/tcg-cpus-icount.h |  17 ++
- include/exec/cpu_ldst_useronly_template.h | 40 ++++++++++------
+ accel/tcg/tcg-cpus-rr.h     |  21 ++
- include/qemu/atomic.h                     | 11 +++++
+ accel/tcg/tcg-cpus.h        |  12 +-
- accel/tcg/user-exec.c                     | 77 +++++++++++++++++++++----------
+ accel/tcg/tcg-all.c         |  13 +-
- target/arm/helper-a64.c                   |  8 ++--
+ accel/tcg/tcg-cpus-icount.c | 147 +++++++++++++
- target/arm/sve_helper.c                   | 43 +++++++++--------
+ accel/tcg/tcg-cpus-mttcg.c  | 140 ++++++++++++
- tcg/aarch64/tcg-target.inc.c              |  2 +-
+ accel/tcg/tcg-cpus-rr.c     | 305 ++++++++++++++++++++++++++
- tcg/optimize.c                            |  4 +-
+ accel/tcg/tcg-cpus.c        | 506 +-------------------------------------------
-files changed, 139 insertions(+), 66 deletions(-)
+ softmmu/icount.c            |   2 +-
  accel/tcg/meson.build       |   9 +-
 files changed, 670 insertions(+), 502 deletions(-)
  create mode 100644 accel/tcg/tcg-cpus-icount.h
  create mode 100644 accel/tcg/tcg-cpus-rr.h
  create mode 100644 accel/tcg/tcg-cpus-icount.c
  create mode 100644 accel/tcg/tcg-cpus-mttcg.c
  create mode 100644 accel/tcg/tcg-cpus-rr.c

-[Qemu-devel] [PULL for-4.1 1/7] tcg: Fix constant folding of INDEX_op_extract2_i32
+Deleted patch
-On a 64-bit host, discard any replications of the 32-bit
-sign bit when performing the shift and merge.
-Fixes: https://bugs.launchpad.net/bugs/1834496
-Tested-by: Christophe Lyon <christophe.lyon@linaro.org>
-Tested-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- tcg/optimize.c | 4 ++--
-file changed, 2 insertions(+), 2 deletions(-)
-diff --git a/tcg/optimize.c b/tcg/optimize.c
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/optimize.c
-+++ b/tcg/optimize.c
-@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
-                 if (opc == INDEX_op_extract2_i64) {
-                     tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
-                 } else {
--                    tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3]));
--                    tmp = (int32_t)tmp;
-+                    tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
-+                                    ((uint32_t)v2 << (32 - op->args[3])));
-                 }
-                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
-                 break;
---
-.17.1

-[Qemu-devel] [PULL for-4.1 2/7] tcg/aarch64: Fix output of extract2 opcodes
+Deleted patch
-This patch fixes two problems:
-(1) The inputs to the EXTR insn were reversed,
-(2) The input constraints use rZ, which means that we need to use
-    the REG0 macro in order to supply XZR for a constant 0 input.
-Fixes: 464c2969d5d
-Reported-by: Peter Maydell <peter.maydell@linaro.org>
-Tested-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- tcg/aarch64/tcg-target.inc.c | 2 +-
-file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
-index XXXXXXX..XXXXXXX 100644
---- a/tcg/aarch64/tcg-target.inc.c
-+++ b/tcg/aarch64/tcg-target.inc.c
-@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
-     case INDEX_op_extract2_i64:
-     case INDEX_op_extract2_i32:
--        tcg_out_extr(s, ext, a0, a1, a2, args[3]);
-+        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
-         break;
-     case INDEX_op_add2_i32:
---
-.17.1

-[Qemu-devel] [PULL for-4.1 3/7] include/qemu/atomic.h: Add signal_barrier
+Deleted patch
-We have some potential race conditions vs our user-exec signal
-handler that will be solved with this barrier.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- include/qemu/atomic.h | 11 +++++++++++
-file changed, 11 insertions(+)
-diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/qemu/atomic.h
-+++ b/include/qemu/atomic.h
-@@ -XXX,XX +XXX,XX @@
- #define smp_read_barrier_depends()   barrier()
- #endif
-+/*
-+ * A signal barrier forces all pending local memory ops to be observed before
-+ * a SIGSEGV is delivered to the *same* thread.  In practice this is exactly
-+ * the same as barrier(), but since we have the correct builtin, use it.
-+ */
-+#define signal_barrier()    __atomic_signal_fence(__ATOMIC_SEQ_CST)
-+
- /* Sanity check that the size of an atomic operation isn't "overly large".
-  * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not
-  * want to use them because we ought not need them, and this lets us do a
-@@ -XXX,XX +XXX,XX @@
- #define smp_read_barrier_depends()   barrier()
- #endif
-+#ifndef signal_barrier
-+#define signal_barrier()    barrier()
-+#endif
-+
- /* These will only be atomic if the processor does the fetch or store
-  * in a single issue memory operation
-  */
---
-.17.1

-[Qemu-devel] [PULL for-4.1 4/7] tcg: Introduce set/clear_helper_retaddr
+[PULL 1/3] accel/tcg: split CpusAccel into three TCG variants
-At present we have a potential error in that helper_retaddr contains
+From: Claudio Fontana <cfontana@suse.de>
 data for handle_cpu_signal, but we have not ensured that those stores
 will be scheduled properly before the operation that may fault.
-It might be that these races are not in practice observable, due to
+split up the CpusAccel tcg_cpus into three TCG variants:
 our use of -fno-strict-aliasing, but better safe than sorry.
-Adjust all of the setters of helper_retaddr.
+tcg_cpus_rr (single threaded, round robin cpus)
 tcg_cpus_icount (same as rr, but with instruction counting enabled)
 tcg_cpus_mttcg (multi-threaded cpus)
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Suggested-by: Richard Henderson <richard.henderson@linaro.org>
 Signed-off-by: Claudio Fontana <cfontana@suse.de>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Message-Id: <20201015143217.29337-2-cfontana@suse.de>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/exec/cpu_ldst.h                   | 20 +++++++++++
+ accel/tcg/tcg-cpus-icount.h |  17 ++
- include/exec/cpu_ldst_useronly_template.h | 12 +++----
+ accel/tcg/tcg-cpus-mttcg.h  |  21 ++
- accel/tcg/user-exec.c                     | 11 +++---
+ accel/tcg/tcg-cpus-rr.h     |  20 ++
- target/arm/helper-a64.c                   |  8 ++---
+ accel/tcg/tcg-cpus.h        |  13 +-
- target/arm/sve_helper.c                   | 43 +++++++++++------------
+ accel/tcg/tcg-all.c         |   8 +-
-files changed, 57 insertions(+), 37 deletions(-)
+ accel/tcg/tcg-cpus-icount.c | 147 +++++++++++
  accel/tcg/tcg-cpus-mttcg.c  | 117 +++++++++
  accel/tcg/tcg-cpus-rr.c     | 270 ++++++++++++++++++++
  accel/tcg/tcg-cpus.c        | 484 ++----------------------------------
  softmmu/icount.c            |   2 +-
  accel/tcg/meson.build       |   9 +-
 files changed, 646 insertions(+), 462 deletions(-)
  create mode 100644 accel/tcg/tcg-cpus-icount.h
  create mode 100644 accel/tcg/tcg-cpus-mttcg.h
  create mode 100644 accel/tcg/tcg-cpus-rr.h
  create mode 100644 accel/tcg/tcg-cpus-icount.c
  create mode 100644 accel/tcg/tcg-cpus-mttcg.c
  create mode 100644 accel/tcg/tcg-cpus-rr.c
-diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
+diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-icount.h
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Single Threaded vCPUs implementation using instruction counting
 + *
 + * Copyright 2020 SUSE LLC
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + */
 +
 +#ifndef TCG_CPUS_ICOUNT_H
 +#define TCG_CPUS_ICOUNT_H
 +
 +void handle_icount_deadline(void);
 +void prepare_icount_for_run(CPUState *cpu);
 +void process_icount_data(CPUState *cpu);
 +
 +#endif /* TCG_CPUS_ICOUNT_H */
 diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-mttcg.h
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Multi Threaded vCPUs implementation
 + *
 + * Copyright 2020 SUSE LLC
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + */
 +
 +#ifndef TCG_CPUS_MTTCG_H
 +#define TCG_CPUS_MTTCG_H
 +
 +/*
 + * In the multi-threaded case each vCPU has its own thread. The TLS
 + * variable current_cpu can be used deep in the code to find the
 + * current CPUState for a given thread.
 + */
 +
 +void *tcg_cpu_thread_fn(void *arg);
 +
 +#endif /* TCG_CPUS_MTTCG_H */
 diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Single Threaded vCPUs implementation
 + *
 + * Copyright 2020 SUSE LLC
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + */
 +
 +#ifndef TCG_CPUS_RR_H
 +#define TCG_CPUS_RR_H
 +
 +#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 +
 +/* Kick all RR vCPUs. */
 +void qemu_cpu_kick_rr_cpus(CPUState *unused);
 +
 +void *tcg_rr_cpu_thread_fn(void *arg);
 +
 +#endif /* TCG_CPUS_RR_H */
 diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/exec/cpu_ldst.h
+--- a/accel/tcg/tcg-cpus.h
-+++ b/include/exec/cpu_ldst.h
++++ b/accel/tcg/tcg-cpus.h
-@@ -XXX,XX +XXX,XX @@ typedef target_ulong abi_ptr;
+@@ -XXX,XX +XXX,XX @@
+ /*
- extern __thread uintptr_t helper_retaddr;
+- * Accelerator CPUS Interface
++ * QEMU TCG vCPU common functionality
-+static inline void set_helper_retaddr(uintptr_t ra)
++ *
-+{
++ * Functionality common to all TCG vcpu variants: mttcg, rr and icount.
-+    helper_retaddr = ra;
+  *
   * Copyright 2020 SUSE LLC
   *
@@ -XXX,XX +XXX,XX @@
  #include "sysemu/cpus.h"
 -extern const CpusAccel tcg_cpus;
 +extern const CpusAccel tcg_cpus_mttcg;
 +extern const CpusAccel tcg_cpus_icount;
 +extern const CpusAccel tcg_cpus_rr;
 +
 +void tcg_start_vcpu_thread(CPUState *cpu);
 +void qemu_tcg_destroy_vcpu(CPUState *cpu);
 +int tcg_cpu_exec(CPUState *cpu);
 +void tcg_handle_interrupt(CPUState *cpu, int mask);
  #endif /* TCG_CPUS_H */
 diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-all.c
 +++ b/accel/tcg/tcg-all.c
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
      tcg_exec_init(s->tb_size * 1024 * 1024);
      mttcg_enabled = s->mttcg_enabled;
 -    cpus_register_accel(&tcg_cpus);
 +    if (mttcg_enabled) {
 +        cpus_register_accel(&tcg_cpus_mttcg);
 +    } else if (icount_enabled()) {
 +        cpus_register_accel(&tcg_cpus_icount);
 +    } else {
 +        cpus_register_accel(&tcg_cpus_rr);
 +    }
      return 0;
  }
 diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Single Threaded vCPUs implementation using instruction counting
 + *
 + * Copyright (c) 2003-2008 Fabrice Bellard
 + * Copyright (c) 2014 Red Hat Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a copy
 + * of this software and associated documentation files (the "Software"), to deal
 + * in the Software without restriction, including without limitation the rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 + * THE SOFTWARE.
 + */
 +
 +#include "qemu/osdep.h"
 +#include "qemu-common.h"
 +#include "sysemu/tcg.h"
 +#include "sysemu/replay.h"
 +#include "qemu/main-loop.h"
 +#include "qemu/guest-random.h"
 +#include "exec/exec-all.h"
 +#include "hw/boards.h"
 +
 +#include "tcg-cpus.h"
 +#include "tcg-cpus-icount.h"
 +#include "tcg-cpus-rr.h"
 +
 +static int64_t tcg_get_icount_limit(void)
 +{
 +    int64_t deadline;
 +
 +    if (replay_mode != REPLAY_MODE_PLAY) {
 +        /*
 +         * Include all the timers, because they may need an attention.
 +         * Too long CPU execution may create unnecessary delay in UI.
 +         */
 +        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 +                                              QEMU_TIMER_ATTR_ALL);
 +        /* Check realtime timers, because they help with input processing */
 +        deadline = qemu_soonest_timeout(deadline,
 +                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
 +                                           QEMU_TIMER_ATTR_ALL));
 +
 +        /*
 +         * Maintain prior (possibly buggy) behaviour where if no deadline
 +         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
 +         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
 +         * nanoseconds.
 +         */
 +        if ((deadline < 0) || (deadline > INT32_MAX)) {
 +            deadline = INT32_MAX;
 +        }
 +
 +        return icount_round(deadline);
 +    } else {
 +        return replay_get_instructions();
 +    }
 +}
 +
 +static void notify_aio_contexts(void)
 +{
 +    /* Wake up other AioContexts.  */
 +    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 +    qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 +}
 +
 +void handle_icount_deadline(void)
 +{
 +    assert(qemu_in_vcpu_thread());
 +    int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 +                                                  QEMU_TIMER_ATTR_ALL);
 +
 +    if (deadline == 0) {
 +        notify_aio_contexts();
 +    }
 +}
 +
 +void prepare_icount_for_run(CPUState *cpu)
 +{
 +    int insns_left;
 +
 +    /*
-+     * Ensure that this write is visible to the SIGSEGV handler that
++     * These should always be cleared by process_icount_data after
-+     * may be invoked due to a subsequent invalid memory operation.
++     * each vCPU execution. However u16.high can be raised
 +     * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
 +     */
-+    signal_barrier();
++    g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
-+}
++    g_assert(cpu->icount_extra == 0);
 +
-+static inline void clear_helper_retaddr(void)
++    cpu->icount_budget = tcg_get_icount_limit();
-+{
++    insns_left = MIN(0xffff, cpu->icount_budget);
-+    /*
++    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
-+     * Ensure that previous memory operations have succeeded before
++    cpu->icount_extra = cpu->icount_budget - insns_left;
-+     * removing the data visible to the signal handler.
++
-+     */
++    replay_mutex_lock();
-+    signal_barrier();
++
-+    helper_retaddr = 0;
++    if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
-+}
++        notify_aio_contexts();
-+
++    }
- /* In user-only mode we provide only the _code and _data accessors. */
++}
++
- #define MEMSUFFIX _data
++void process_icount_data(CPUState *cpu)
-diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h
++{
 +    /* Account for executed instructions */
 +    icount_update(cpu);
 +
 +    /* Reset the counters */
 +    cpu_neg(cpu)->icount_decr.u16.low = 0;
 +    cpu->icount_extra = 0;
 +    cpu->icount_budget = 0;
 +
 +    replay_account_executed_instructions();
 +
 +    replay_mutex_unlock();
 +}
 +
 +static void icount_handle_interrupt(CPUState *cpu, int mask)
 +{
 +    int old_mask = cpu->interrupt_request;
 +
 +    tcg_handle_interrupt(cpu, mask);
 +    if (qemu_cpu_is_self(cpu) &&
 +        !cpu->can_do_io
 +        && (mask & ~old_mask) != 0) {
 +        cpu_abort(cpu, "Raised interrupt while not in I/O function");
 +    }
 +}
 +
 +const CpusAccel tcg_cpus_icount = {
 +    .create_vcpu_thread = tcg_start_vcpu_thread,
 +    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 +
 +    .handle_interrupt = icount_handle_interrupt,
 +    .get_virtual_clock = icount_get,
 +    .get_elapsed_ticks = icount_get,
 +};
 diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Multi Threaded vCPUs implementation
 + *
 + * Copyright (c) 2003-2008 Fabrice Bellard
 + * Copyright (c) 2014 Red Hat Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a copy
 + * of this software and associated documentation files (the "Software"), to deal
 + * in the Software without restriction, including without limitation the rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 + * THE SOFTWARE.
 + */
 +
 +#include "qemu/osdep.h"
 +#include "qemu-common.h"
 +#include "sysemu/tcg.h"
 +#include "sysemu/replay.h"
 +#include "qemu/main-loop.h"
 +#include "qemu/guest-random.h"
 +#include "exec/exec-all.h"
 +#include "hw/boards.h"
 +
 +#include "tcg-cpus.h"
 +#include "tcg-cpus-mttcg.h"
 +
 +/*
 + * In the multi-threaded case each vCPU has its own thread. The TLS
 + * variable current_cpu can be used deep in the code to find the
 + * current CPUState for a given thread.
 + */
 +
 +void *tcg_cpu_thread_fn(void *arg)
 +{
 +    CPUState *cpu = arg;
 +
 +    assert(tcg_enabled());
 +    g_assert(!icount_enabled());
 +
 +    rcu_register_thread();
 +    tcg_register_thread();
 +
 +    qemu_mutex_lock_iothread();
 +    qemu_thread_get_self(cpu->thread);
 +
 +    cpu->thread_id = qemu_get_thread_id();
 +    cpu->can_do_io = 1;
 +    current_cpu = cpu;
 +    cpu_thread_signal_created(cpu);
 +    qemu_guest_random_seed_thread_part2(cpu->random_seed);
 +
 +    /* process any pending work */
 +    cpu->exit_request = 1;
 +
 +    do {
 +        if (cpu_can_run(cpu)) {
 +            int r;
 +            qemu_mutex_unlock_iothread();
 +            r = tcg_cpu_exec(cpu);
 +            qemu_mutex_lock_iothread();
 +            switch (r) {
 +            case EXCP_DEBUG:
 +                cpu_handle_guest_debug(cpu);
 +                break;
 +            case EXCP_HALTED:
 +                /*
 +                 * during start-up the vCPU is reset and the thread is
 +                 * kicked several times. If we don't ensure we go back
 +                 * to sleep in the halted state we won't cleanly
 +                 * start-up when the vCPU is enabled.
 +                 *
 +                 * cpu->halted should ensure we sleep in wait_io_event
 +                 */
 +                g_assert(cpu->halted);
 +                break;
 +            case EXCP_ATOMIC:
 +                qemu_mutex_unlock_iothread();
 +                cpu_exec_step_atomic(cpu);
 +                qemu_mutex_lock_iothread();
 +            default:
 +                /* Ignore everything else? */
 +                break;
 +            }
 +        }
 +
 +        qatomic_mb_set(&cpu->exit_request, 0);
 +        qemu_wait_io_event(cpu);
 +    } while (!cpu->unplug || cpu_can_run(cpu));
 +
 +    qemu_tcg_destroy_vcpu(cpu);
 +    qemu_mutex_unlock_iothread();
 +    rcu_unregister_thread();
 +    return NULL;
 +}
 +
 +static void mttcg_kick_vcpu_thread(CPUState *cpu)
 +{
 +    cpu_exit(cpu);
 +}
 +
 +const CpusAccel tcg_cpus_mttcg = {
 +    .create_vcpu_thread = tcg_start_vcpu_thread,
 +    .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 +
 +    .handle_interrupt = tcg_handle_interrupt,
 +};
 diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Single Threaded vCPUs implementation
 + *
 + * Copyright (c) 2003-2008 Fabrice Bellard
 + * Copyright (c) 2014 Red Hat Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a copy
 + * of this software and associated documentation files (the "Software"), to deal
 + * in the Software without restriction, including without limitation the rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 + * THE SOFTWARE.
 + */
 +
 +#include "qemu/osdep.h"
 +#include "qemu-common.h"
 +#include "sysemu/tcg.h"
 +#include "sysemu/replay.h"
 +#include "qemu/main-loop.h"
 +#include "qemu/guest-random.h"
 +#include "exec/exec-all.h"
 +#include "hw/boards.h"
 +
 +#include "tcg-cpus.h"
 +#include "tcg-cpus-rr.h"
 +#include "tcg-cpus-icount.h"
 +
 +/* Kick all RR vCPUs */
 +void qemu_cpu_kick_rr_cpus(CPUState *unused)
 +{
 +    CPUState *cpu;
 +
 +    CPU_FOREACH(cpu) {
 +        cpu_exit(cpu);
 +    };
 +}
 +
 +/*
 + * TCG vCPU kick timer
 + *
 + * The kick timer is responsible for moving single threaded vCPU
 + * emulation on to the next vCPU. If more than one vCPU is running a
 + * timer event with force a cpu->exit so the next vCPU can get
 + * scheduled.
 + *
 + * The timer is removed if all vCPUs are idle and restarted again once
 + * idleness is complete.
 + */
 +
 +static QEMUTimer *tcg_kick_vcpu_timer;
 +static CPUState *tcg_current_rr_cpu;
 +
 +#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 +
 +static inline int64_t qemu_tcg_next_kick(void)
 +{
 +    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 +}
 +
 +/* Kick the currently round-robin scheduled vCPU to next */
 +static void qemu_cpu_kick_rr_next_cpu(void)
 +{
 +    CPUState *cpu;
 +    do {
 +        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
 +        if (cpu) {
 +            cpu_exit(cpu);
 +        }
 +    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
 +}
 +
 +static void kick_tcg_thread(void *opaque)
 +{
 +    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 +    qemu_cpu_kick_rr_next_cpu();
 +}
 +
 +static void start_tcg_kick_timer(void)
 +{
 +    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 +        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 +                                           kick_tcg_thread, NULL);
 +    }
 +    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
 +        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 +    }
 +}
 +
 +static void stop_tcg_kick_timer(void)
 +{
 +    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
 +        timer_del(tcg_kick_vcpu_timer);
 +    }
 +}
 +
 +static void qemu_tcg_rr_wait_io_event(void)
 +{
 +    CPUState *cpu;
 +
 +    while (all_cpu_threads_idle()) {
 +        stop_tcg_kick_timer();
 +        qemu_cond_wait_iothread(first_cpu->halt_cond);
 +    }
 +
 +    start_tcg_kick_timer();
 +
 +    CPU_FOREACH(cpu) {
 +        qemu_wait_io_event_common(cpu);
 +    }
 +}
 +
 +/*
 + * Destroy any remaining vCPUs which have been unplugged and have
 + * finished running
 + */
 +static void deal_with_unplugged_cpus(void)
 +{
 +    CPUState *cpu;
 +
 +    CPU_FOREACH(cpu) {
 +        if (cpu->unplug && !cpu_can_run(cpu)) {
 +            qemu_tcg_destroy_vcpu(cpu);
 +            break;
 +        }
 +    }
 +}
 +
 +/*
 + * In the single-threaded case each vCPU is simulated in turn. If
 + * there is more than a single vCPU we create a simple timer to kick
 + * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
 + * This is done explicitly rather than relying on side-effects
 + * elsewhere.
 + */
 +
 +void *tcg_rr_cpu_thread_fn(void *arg)
 +{
 +    CPUState *cpu = arg;
 +
 +    assert(tcg_enabled());
 +    rcu_register_thread();
 +    tcg_register_thread();
 +
 +    qemu_mutex_lock_iothread();
 +    qemu_thread_get_self(cpu->thread);
 +
 +    cpu->thread_id = qemu_get_thread_id();
 +    cpu->can_do_io = 1;
 +    cpu_thread_signal_created(cpu);
 +    qemu_guest_random_seed_thread_part2(cpu->random_seed);
 +
 +    /* wait for initial kick-off after machine start */
 +    while (first_cpu->stopped) {
 +        qemu_cond_wait_iothread(first_cpu->halt_cond);
 +
 +        /* process any pending work */
 +        CPU_FOREACH(cpu) {
 +            current_cpu = cpu;
 +            qemu_wait_io_event_common(cpu);
 +        }
 +    }
 +
 +    start_tcg_kick_timer();
 +
 +    cpu = first_cpu;
 +
 +    /* process any pending work */
 +    cpu->exit_request = 1;
 +
 +    while (1) {
 +        qemu_mutex_unlock_iothread();
 +        replay_mutex_lock();
 +        qemu_mutex_lock_iothread();
 +
 +        if (icount_enabled()) {
 +            /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
 +            icount_account_warp_timer();
 +            /*
 +             * Run the timers here.  This is much more efficient than
 +             * waking up the I/O thread and waiting for completion.
 +             */
 +            handle_icount_deadline();
 +        }
 +
 +        replay_mutex_unlock();
 +
 +        if (!cpu) {
 +            cpu = first_cpu;
 +        }
 +
 +        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
 +
 +            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
 +            current_cpu = cpu;
 +
 +            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
 +                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
 +
 +            if (cpu_can_run(cpu)) {
 +                int r;
 +
 +                qemu_mutex_unlock_iothread();
 +                if (icount_enabled()) {
 +                    prepare_icount_for_run(cpu);
 +                }
 +                r = tcg_cpu_exec(cpu);
 +                if (icount_enabled()) {
 +                    process_icount_data(cpu);
 +                }
 +                qemu_mutex_lock_iothread();
 +
 +                if (r == EXCP_DEBUG) {
 +                    cpu_handle_guest_debug(cpu);
 +                    break;
 +                } else if (r == EXCP_ATOMIC) {
 +                    qemu_mutex_unlock_iothread();
 +                    cpu_exec_step_atomic(cpu);
 +                    qemu_mutex_lock_iothread();
 +                    break;
 +                }
 +            } else if (cpu->stop) {
 +                if (cpu->unplug) {
 +                    cpu = CPU_NEXT(cpu);
 +                }
 +                break;
 +            }
 +
 +            cpu = CPU_NEXT(cpu);
 +        } /* while (cpu && !cpu->exit_request).. */
 +
 +        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
 +        qatomic_set(&tcg_current_rr_cpu, NULL);
 +
 +        if (cpu && cpu->exit_request) {
 +            qatomic_mb_set(&cpu->exit_request, 0);
 +        }
 +
 +        if (icount_enabled() && all_cpu_threads_idle()) {
 +            /*
 +             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
 +             * in the main_loop, wake it up in order to start the warp timer.
 +             */
 +            qemu_notify_event();
 +        }
 +
 +        qemu_tcg_rr_wait_io_event();
 +        deal_with_unplugged_cpus();
 +    }
 +
 +    rcu_unregister_thread();
 +    return NULL;
 +}
 +
 +const CpusAccel tcg_cpus_rr = {
 +    .create_vcpu_thread = tcg_start_vcpu_thread,
 +    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 +
 +    .handle_interrupt = tcg_handle_interrupt,
 +};
 diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
 index XXXXXXX..XXXXXXX 100644
---- a/include/exec/cpu_ldst_useronly_template.h
+--- a/accel/tcg/tcg-cpus.c
-+++ b/include/exec/cpu_ldst_useronly_template.h
++++ b/accel/tcg/tcg-cpus.c
-@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
+@@ -XXX,XX +XXX,XX @@
-                                                   uintptr_t retaddr)
+ /*
 - * QEMU System Emulator
 + * QEMU TCG vCPU common functionality
 + *
 + * Functionality common to all TCG vCPU variants: mttcg, rr and icount.
   *
   * Copyright (c) 2003-2008 Fabrice Bellard
   * Copyright (c) 2014 Red Hat Inc.
@@ -XXX,XX +XXX,XX @@
  #include "hw/boards.h"
  #include "tcg-cpus.h"
 +#include "tcg-cpus-mttcg.h"
 +#include "tcg-cpus-rr.h"
 -/* Kick all RR vCPUs */
 -static void qemu_cpu_kick_rr_cpus(void)
 -{
 -    CPUState *cpu;
 +/* common functionality among all TCG variants */
 -    CPU_FOREACH(cpu) {
 -        cpu_exit(cpu);
 -    };
 -}
 -
 -static void tcg_kick_vcpu_thread(CPUState *cpu)
 -{
 -    if (qemu_tcg_mttcg_enabled()) {
 -        cpu_exit(cpu);
 -    } else {
 -        qemu_cpu_kick_rr_cpus();
 -    }
 -}
 -
 -/*
 - * TCG vCPU kick timer
 - *
 - * The kick timer is responsible for moving single threaded vCPU
 - * emulation on to the next vCPU. If more than one vCPU is running a
 - * timer event with force a cpu->exit so the next vCPU can get
 - * scheduled.
 - *
 - * The timer is removed if all vCPUs are idle and restarted again once
 - * idleness is complete.
 - */
 -
 -static QEMUTimer *tcg_kick_vcpu_timer;
 -static CPUState *tcg_current_rr_cpu;
 -
 -#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 -
 -static inline int64_t qemu_tcg_next_kick(void)
 -{
 -    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 -}
 -
 -/* Kick the currently round-robin scheduled vCPU to next */
 -static void qemu_cpu_kick_rr_next_cpu(void)
 -{
 -    CPUState *cpu;
 -    do {
 -        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
 -        if (cpu) {
 -            cpu_exit(cpu);
 -        }
 -    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
 -}
 -
 -static void kick_tcg_thread(void *opaque)
 -{
 -    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 -    qemu_cpu_kick_rr_next_cpu();
 -}
 -
 -static void start_tcg_kick_timer(void)
 -{
 -    assert(!mttcg_enabled);
 -    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 -        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 -                                           kick_tcg_thread, NULL);
 -    }
 -    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
 -        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 -    }
 -}
 -
 -static void stop_tcg_kick_timer(void)
 -{
 -    assert(!mttcg_enabled);
 -    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
 -        timer_del(tcg_kick_vcpu_timer);
 -    }
 -}
 -
 -static void qemu_tcg_destroy_vcpu(CPUState *cpu)
 -{
 -}
 -
 -static void qemu_tcg_rr_wait_io_event(void)
 -{
 -    CPUState *cpu;
 -
 -    while (all_cpu_threads_idle()) {
 -        stop_tcg_kick_timer();
 -        qemu_cond_wait_iothread(first_cpu->halt_cond);
 -    }
 -
 -    start_tcg_kick_timer();
 -
 -    CPU_FOREACH(cpu) {
 -        qemu_wait_io_event_common(cpu);
 -    }
 -}
 -
 -static int64_t tcg_get_icount_limit(void)
 -{
 -    int64_t deadline;
 -
 -    if (replay_mode != REPLAY_MODE_PLAY) {
 -        /*
 -         * Include all the timers, because they may need an attention.
 -         * Too long CPU execution may create unnecessary delay in UI.
 -         */
 -        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 -                                              QEMU_TIMER_ATTR_ALL);
 -        /* Check realtime timers, because they help with input processing */
 -        deadline = qemu_soonest_timeout(deadline,
 -                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
 -                                           QEMU_TIMER_ATTR_ALL));
 -
 -        /*
 -         * Maintain prior (possibly buggy) behaviour where if no deadline
 -         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
 -         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
 -         * nanoseconds.
 -         */
 -        if ((deadline < 0) || (deadline > INT32_MAX)) {
 -            deadline = INT32_MAX;
 -        }
 -
 -        return icount_round(deadline);
 -    } else {
 -        return replay_get_instructions();
 -    }
 -}
 -
 -static void notify_aio_contexts(void)
 -{
 -    /* Wake up other AioContexts.  */
 -    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 -    qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 -}
 -
 -static void handle_icount_deadline(void)
 -{
 -    assert(qemu_in_vcpu_thread());
 -    if (icount_enabled()) {
 -        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 -                                                      QEMU_TIMER_ATTR_ALL);
 -
 -        if (deadline == 0) {
 -            notify_aio_contexts();
 -        }
 -    }
 -}
 -
 -static void prepare_icount_for_run(CPUState *cpu)
 -{
 -    if (icount_enabled()) {
 -        int insns_left;
 -
 -        /*
 -         * These should always be cleared by process_icount_data after
 -         * each vCPU execution. However u16.high can be raised
 -         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
 -         */
 -        g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
 -        g_assert(cpu->icount_extra == 0);
 -
 -        cpu->icount_budget = tcg_get_icount_limit();
 -        insns_left = MIN(0xffff, cpu->icount_budget);
 -        cpu_neg(cpu)->icount_decr.u16.low = insns_left;
 -        cpu->icount_extra = cpu->icount_budget - insns_left;
 -
 -        replay_mutex_lock();
 -
 -        if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
 -            notify_aio_contexts();
 -        }
 -    }
 -}
 -
 -static void process_icount_data(CPUState *cpu)
 -{
 -    if (icount_enabled()) {
 -        /* Account for executed instructions */
 -        icount_update(cpu);
 -
 -        /* Reset the counters */
 -        cpu_neg(cpu)->icount_decr.u16.low = 0;
 -        cpu->icount_extra = 0;
 -        cpu->icount_budget = 0;
 -
 -        replay_account_executed_instructions();
 -
 -        replay_mutex_unlock();
 -    }
 -}
 -
 -static int tcg_cpu_exec(CPUState *cpu)
 -{
 -    int ret;
 -#ifdef CONFIG_PROFILER
 -    int64_t ti;
 -#endif
 -
 -    assert(tcg_enabled());
 -#ifdef CONFIG_PROFILER
 -    ti = profile_getclock();
 -#endif
 -    cpu_exec_start(cpu);
 -    ret = cpu_exec(cpu);
 -    cpu_exec_end(cpu);
 -#ifdef CONFIG_PROFILER
 -    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
 -                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
 -#endif
 -    return ret;
 -}
 -
 -/*
 - * Destroy any remaining vCPUs which have been unplugged and have
 - * finished running
 - */
 -static void deal_with_unplugged_cpus(void)
 -{
 -    CPUState *cpu;
 -
 -    CPU_FOREACH(cpu) {
 -        if (cpu->unplug && !cpu_can_run(cpu)) {
 -            qemu_tcg_destroy_vcpu(cpu);
 -            cpu_thread_signal_destroyed(cpu);
 -            break;
 -        }
 -    }
 -}
 -
 -/*
 - * Single-threaded TCG
 - *
 - * In the single-threaded case each vCPU is simulated in turn. If
 - * there is more than a single vCPU we create a simple timer to kick
 - * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
 - * This is done explicitly rather than relying on side-effects
 - * elsewhere.
 - */
 -
 -static void *tcg_rr_cpu_thread_fn(void *arg)
 -{
 -    CPUState *cpu = arg;
 -
 -    assert(tcg_enabled());
 -    rcu_register_thread();
 -    tcg_register_thread();
 -
 -    qemu_mutex_lock_iothread();
 -    qemu_thread_get_self(cpu->thread);
 -
 -    cpu->thread_id = qemu_get_thread_id();
 -    cpu->can_do_io = 1;
 -    cpu_thread_signal_created(cpu);
 -    qemu_guest_random_seed_thread_part2(cpu->random_seed);
 -
 -    /* wait for initial kick-off after machine start */
 -    while (first_cpu->stopped) {
 -        qemu_cond_wait_iothread(first_cpu->halt_cond);
 -
 -        /* process any pending work */
 -        CPU_FOREACH(cpu) {
 -            current_cpu = cpu;
 -            qemu_wait_io_event_common(cpu);
 -        }
 -    }
 -
 -    start_tcg_kick_timer();
 -
 -    cpu = first_cpu;
 -
 -    /* process any pending work */
 -    cpu->exit_request = 1;
 -
 -    while (1) {
 -        qemu_mutex_unlock_iothread();
 -        replay_mutex_lock();
 -        qemu_mutex_lock_iothread();
 -        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
 -        icount_account_warp_timer();
 -
 -        /*
 -         * Run the timers here.  This is much more efficient than
 -         * waking up the I/O thread and waiting for completion.
 -         */
 -        handle_icount_deadline();
 -
 -        replay_mutex_unlock();
 -
 -        if (!cpu) {
 -            cpu = first_cpu;
 -        }
 -
 -        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
 -
 -            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
 -            current_cpu = cpu;
 -
 -            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
 -                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
 -
 -            if (cpu_can_run(cpu)) {
 -                int r;
 -
 -                qemu_mutex_unlock_iothread();
 -                prepare_icount_for_run(cpu);
 -
 -                r = tcg_cpu_exec(cpu);
 -
 -                process_icount_data(cpu);
 -                qemu_mutex_lock_iothread();
 -
 -                if (r == EXCP_DEBUG) {
 -                    cpu_handle_guest_debug(cpu);
 -                    break;
 -                } else if (r == EXCP_ATOMIC) {
 -                    qemu_mutex_unlock_iothread();
 -                    cpu_exec_step_atomic(cpu);
 -                    qemu_mutex_lock_iothread();
 -                    break;
 -                }
 -            } else if (cpu->stop) {
 -                if (cpu->unplug) {
 -                    cpu = CPU_NEXT(cpu);
 -                }
 -                break;
 -            }
 -
 -            cpu = CPU_NEXT(cpu);
 -        } /* while (cpu && !cpu->exit_request).. */
 -
 -        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
 -        qatomic_set(&tcg_current_rr_cpu, NULL);
 -
 -        if (cpu && cpu->exit_request) {
 -            qatomic_mb_set(&cpu->exit_request, 0);
 -        }
 -
 -        if (icount_enabled() && all_cpu_threads_idle()) {
 -            /*
 -             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
 -             * in the main_loop, wake it up in order to start the warp timer.
 -             */
 -            qemu_notify_event();
 -        }
 -
 -        qemu_tcg_rr_wait_io_event();
 -        deal_with_unplugged_cpus();
 -    }
 -
 -    rcu_unregister_thread();
 -    return NULL;
 -}
 -
 -/*
 - * Multi-threaded TCG
 - *
 - * In the multi-threaded case each vCPU has its own thread. The TLS
 - * variable current_cpu can be used deep in the code to find the
 - * current CPUState for a given thread.
 - */
 -
 -static void *tcg_cpu_thread_fn(void *arg)
 -{
 -    CPUState *cpu = arg;
 -
 -    assert(tcg_enabled());
 -    g_assert(!icount_enabled());
 -
 -    rcu_register_thread();
 -    tcg_register_thread();
 -
 -    qemu_mutex_lock_iothread();
 -    qemu_thread_get_self(cpu->thread);
 -
 -    cpu->thread_id = qemu_get_thread_id();
 -    cpu->can_do_io = 1;
 -    current_cpu = cpu;
 -    cpu_thread_signal_created(cpu);
 -    qemu_guest_random_seed_thread_part2(cpu->random_seed);
 -
 -    /* process any pending work */
 -    cpu->exit_request = 1;
 -
 -    do {
 -        if (cpu_can_run(cpu)) {
 -            int r;
 -            qemu_mutex_unlock_iothread();
 -            r = tcg_cpu_exec(cpu);
 -            qemu_mutex_lock_iothread();
 -            switch (r) {
 -            case EXCP_DEBUG:
 -                cpu_handle_guest_debug(cpu);
 -                break;
 -            case EXCP_HALTED:
 -                /*
 -                 * during start-up the vCPU is reset and the thread is
 -                 * kicked several times. If we don't ensure we go back
 -                 * to sleep in the halted state we won't cleanly
 -                 * start-up when the vCPU is enabled.
 -                 *
 -                 * cpu->halted should ensure we sleep in wait_io_event
 -                 */
 -                g_assert(cpu->halted);
 -                break;
 -            case EXCP_ATOMIC:
 -                qemu_mutex_unlock_iothread();
 -                cpu_exec_step_atomic(cpu);
 -                qemu_mutex_lock_iothread();
 -            default:
 -                /* Ignore everything else? */
 -                break;
 -            }
 -        }
 -
 -        qatomic_mb_set(&cpu->exit_request, 0);
 -        qemu_wait_io_event(cpu);
 -    } while (!cpu->unplug || cpu_can_run(cpu));
 -
 -    qemu_tcg_destroy_vcpu(cpu);
 -    cpu_thread_signal_destroyed(cpu);
 -    qemu_mutex_unlock_iothread();
 -    rcu_unregister_thread();
 -    return NULL;
 -}
 -
 -static void tcg_start_vcpu_thread(CPUState *cpu)
 +void tcg_start_vcpu_thread(CPUState *cpu)
  {
-     RES_TYPE ret;
+     char thread_name[VCPU_THREAD_NAME_SIZE];
--    helper_retaddr = retaddr;
+     static QemuCond *single_tcg_halt_cond;
-+    set_helper_retaddr(retaddr);
+@@ -XXX,XX +XXX,XX @@ static void tcg_start_vcpu_thread(CPUState *cpu)
-     ret = glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(env, ptr);
+     }
 -    helper_retaddr = 0;
 +    clear_helper_retaddr();
      return ret;
  }
-@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
+-static int64_t tcg_get_virtual_clock(void)
-                                                   uintptr_t retaddr)
++void qemu_tcg_destroy_vcpu(CPUState *cpu)
  {
-     int ret;
+-    if (icount_enabled()) {
--    helper_retaddr = retaddr;
+-        return icount_get();
-+    set_helper_retaddr(retaddr);
+-    }
-     ret = glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(env, ptr);
+-    return cpu_get_clock();
--    helper_retaddr = 0;
++    cpu_thread_signal_destroyed(cpu);
 +    clear_helper_retaddr();
      return ret;
  }
- #endif
-@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
+-static int64_t tcg_get_elapsed_ticks(void)
-                                                   RES_TYPE v,
++int tcg_cpu_exec(CPUState *cpu)
                                                    uintptr_t retaddr)
  {
--    helper_retaddr = retaddr;
+-    if (icount_enabled()) {
-+    set_helper_retaddr(retaddr);
+-        return icount_get();
-     glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(env, ptr, v);
+-    }
--    helper_retaddr = 0;
+-    return cpu_get_ticks();
-+    clear_helper_retaddr();
++    int ret;
- }
++#ifdef CONFIG_PROFILER
- #endif
++    int64_t ti;
++#endif
-diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
++    assert(tcg_enabled());
-index XXXXXXX..XXXXXXX 100644
++#ifdef CONFIG_PROFILER
---- a/accel/tcg/user-exec.c
++    ti = profile_getclock();
-+++ b/accel/tcg/user-exec.c
++#endif
-@@ -XXX,XX +XXX,XX @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
++    cpu_exec_start(cpu);
-              * currently executing TB was modified and must be exited
++    ret = cpu_exec(cpu);
-              * immediately.  Clear helper_retaddr for next execution.
++    cpu_exec_end(cpu);
-              */
++#ifdef CONFIG_PROFILER
--            helper_retaddr = 0;
++    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
-+            clear_helper_retaddr();
++                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
-             cpu_exit_tb_from_sighandler(cpu, old_set);
++#endif
              /* NORETURN */
@@ -XXX,XX +XXX,XX @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
       * an exception.  Undo signal and retaddr state prior to longjmp.
       */
      sigprocmask(SIG_SETMASK, old_set, NULL);
 -    helper_retaddr = 0;
 +    clear_helper_retaddr();
      cc = CPU_GET_CLASS(cpu);
      access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
      if (unlikely(addr & (size - 1))) {
          cpu_loop_exit_atomic(env_cpu(env), retaddr);
      }
 -    helper_retaddr = retaddr;
 -    return g2h(addr);
 +    void *ret = g2h(addr);
 +    set_helper_retaddr(retaddr);
 +    return ret;
  }
- /* Macro to call the above, with local variables from the use context.  */
+ /* mask must never be zero, except for A20 change call */
- #define ATOMIC_MMU_DECLS do {} while (0)
+-static void tcg_handle_interrupt(CPUState *cpu, int mask)
- #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
++void tcg_handle_interrupt(CPUState *cpu, int mask)
--#define ATOMIC_MMU_CLEANUP do { helper_retaddr = 0; } while (0)
+ {
-+#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
+-    int old_mask;
+     g_assert(qemu_mutex_iothread_locked());
- #define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
- #define EXTRA_ARGS
+-    old_mask = cpu->interrupt_request;
-diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
+     cpu->interrupt_request |= mask;
      /*
@@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
          qemu_cpu_kick(cpu);
      } else {
          qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
 -        if (icount_enabled() &&
 -            !cpu->can_do_io
 -            && (mask & ~old_mask) != 0) {
 -            cpu_abort(cpu, "Raised interrupt while not in I/O function");
 -        }
      }
  }
 -
 -const CpusAccel tcg_cpus = {
 -    .create_vcpu_thread = tcg_start_vcpu_thread,
 -    .kick_vcpu_thread = tcg_kick_vcpu_thread,
 -
 -    .handle_interrupt = tcg_handle_interrupt,
 -
 -    .get_virtual_clock = tcg_get_virtual_clock,
 -    .get_elapsed_ticks = tcg_get_elapsed_ticks,
 -};
 diff --git a/softmmu/icount.c b/softmmu/icount.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/helper-a64.c
+--- a/softmmu/icount.c
-+++ b/target/arm/helper-a64.c
++++ b/softmmu/icount.c
-@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
+@@ -XXX,XX +XXX,XX @@ void icount_start_warp_timer(void)
-     /* ??? Enforce alignment.  */
-     uint64_t *haddr = g2h(addr);
+ void icount_account_warp_timer(void)
+ {
--    helper_retaddr = ra;
+-    if (!icount_enabled() || !icount_sleep) {
-+    set_helper_retaddr(ra);
++    if (!icount_sleep) {
-     o0 = ldq_le_p(haddr + 0);
+         return;
      o1 = ldq_le_p(haddr + 1);
      oldv = int128_make128(o0, o1);
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
          stq_le_p(haddr + 0, int128_getlo(newv));
          stq_le_p(haddr + 1, int128_gethi(newv));
      }
--    helper_retaddr = 0;
-+    clear_helper_retaddr();
+diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
  #else
      int mem_idx = cpu_mmu_index(env, false);
      TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
      /* ??? Enforce alignment.  */
      uint64_t *haddr = g2h(addr);
 -    helper_retaddr = ra;
 +    set_helper_retaddr(ra);
      o1 = ldq_be_p(haddr + 0);
      o0 = ldq_be_p(haddr + 1);
      oldv = int128_make128(o0, o1);
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
          stq_be_p(haddr + 0, int128_gethi(newv));
          stq_be_p(haddr + 1, int128_getlo(newv));
      }
 -    helper_retaddr = 0;
 +    clear_helper_retaddr();
  #else
      int mem_idx = cpu_mmu_index(env, false);
      TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
 diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/arm/sve_helper.c
+--- a/accel/tcg/meson.build
-+++ b/target/arm/sve_helper.c
++++ b/accel/tcg/meson.build
-@@ -XXX,XX +XXX,XX @@ static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
+@@ -XXX,XX +XXX,XX @@ tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
-     return MIN(split, mem_max - mem_off) + mem_off;
+ tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
- }
+ specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
--static inline void set_helper_retaddr(uintptr_t ra)
+-specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files('tcg-all.c', 'cputlb.c', 'tcg-cpus.c'))
--{
++specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
--#ifdef CONFIG_USER_ONLY
++  'tcg-all.c',
--    helper_retaddr = ra;
++  'cputlb.c',
-+#ifndef CONFIG_USER_ONLY
++  'tcg-cpus.c',
-+/* These are normally defined only for CONFIG_USER_ONLY in <exec/cpu_ldst.h> */
++  'tcg-cpus-mttcg.c',
-+static inline void set_helper_retaddr(uintptr_t ra) { }
++  'tcg-cpus-icount.c',
-+static inline void clear_helper_retaddr(void) { }
++  'tcg-cpus-rr.c'
- #endif
++))
 -}
  /*
   * The result of tlb_vaddr_to_host for user-only is just g2h(x),
@@ -XXX,XX +XXX,XX @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
          if (test_host_page(host)) {
              mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
              tcg_debug_assert(mem_off == mem_max);
 -            set_helper_retaddr(0);
 +            clear_helper_retaddr();
              /* After having taken any fault, zero leading inactive elements. */
              swap_memzero(vd, reg_off);
              return;
@@ -XXX,XX +XXX,XX @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
      }
  #endif
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
      memcpy(vd, &scratch, reg_max);
  }
@@ -XXX,XX +XXX,XX @@ static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
              addr += 2 * size;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
      /* Wait until all exceptions have been raised to write back.  */
      memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
@@ -XXX,XX +XXX,XX @@ static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
              addr += 3 * size;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
      /* Wait until all exceptions have been raised to write back.  */
      memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
@@ -XXX,XX +XXX,XX @@ static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
              addr += 4 * size;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
      /* Wait until all exceptions have been raised to write back.  */
      memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
@@ -XXX,XX +XXX,XX @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
          if (test_host_page(host)) {
              mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
              tcg_debug_assert(mem_off == mem_max);
 -            set_helper_retaddr(0);
 +            clear_helper_retaddr();
              /* After any fault, zero any leading inactive elements.  */
              swap_memzero(vd, reg_off);
              return;
@@ -XXX,XX +XXX,XX @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
      }
  #endif
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
      record_fault(env, reg_off, reg_max);
  }
@@ -XXX,XX +XXX,XX @@ static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
              addr += msize;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
  }
  static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
@@ -XXX,XX +XXX,XX @@ static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
              addr += 2 * msize;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
  }
  static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
@@ -XXX,XX +XXX,XX @@ static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
              addr += 3 * msize;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
  }
  static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
@@ -XXX,XX +XXX,XX @@ static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
              addr += 4 * msize;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
  }
  #define DO_STN_1(N, NAME, ESIZE) \
@@ -XXX,XX +XXX,XX @@ static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
              i += 4, pg >>= 4;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
      /* Wait until all exceptions have been raised to write back.  */
      memcpy(vd, &scratch, oprsz);
@@ -XXX,XX +XXX,XX @@ static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
              tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
          }
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
      /* Wait until all exceptions have been raised to write back.  */
      memcpy(vd, &scratch, oprsz * 8);
@@ -XXX,XX +XXX,XX @@ static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
          tlb_fn(env, vd, reg_off, addr, oi, ra);
          /* The rest of the reads will be non-faulting.  */
 -        set_helper_retaddr(0);
 +        clear_helper_retaddr();
      }
      /* After any fault, zero the leading predicated false elements.  */
@@ -XXX,XX +XXX,XX @@ static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
          tlb_fn(env, vd, reg_off, addr, oi, ra);
          /* The rest of the reads will be non-faulting.  */
 -        set_helper_retaddr(0);
 +        clear_helper_retaddr();
      }
      /* After any fault, zero the leading predicated false elements.  */
@@ -XXX,XX +XXX,XX @@ static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
              i += 4, pg >>= 4;
          } while (i & 15);
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
  }
  static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
@@ -XXX,XX +XXX,XX @@ static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
              tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
          }
      }
 -    set_helper_retaddr(0);
 +    clear_helper_retaddr();
  }
  #define DO_ST1_ZPZ_S(MEM, OFS) \
 --
-.17.1
+.25.1

-[Qemu-devel] [PULL for-4.1 5/7] tcg: Remove cpu_ld*_code_ra
+[PULL 2/3] accel/tcg: split tcg_start_vcpu_thread
-These functions are not used, and are not usable in the
+From: Claudio Fontana <cfontana@suse.de>
-context of code generation, because we never have a helper
-return address to pass in to them.
+after the initial split into 3 tcg variants, we proceed to also
+split tcg_start_vcpu_thread.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 We actually split it in 2 this time, since the icount variant
 just uses the round robin function.
 Suggested-by: Richard Henderson <richard.henderson@linaro.org>
 Signed-off-by: Claudio Fontana <cfontana@suse.de>
 Message-Id: <20201015143217.29337-3-cfontana@suse.de>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/exec/cpu_ldst_useronly_template.h | 6 +++++-
+ accel/tcg/tcg-cpus-mttcg.h  | 21 --------------
-file changed, 5 insertions(+), 1 deletion(-)
+ accel/tcg/tcg-cpus-rr.h     |  3 +-
+ accel/tcg/tcg-cpus.h        |  1 -
-diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h
+ accel/tcg/tcg-all.c         |  5 ++++
-index XXXXXXX..XXXXXXX 100644
+ accel/tcg/tcg-cpus-icount.c |  2 +-
---- a/include/exec/cpu_ldst_useronly_template.h
+ accel/tcg/tcg-cpus-mttcg.c  | 29 +++++++++++++++++--
-+++ b/include/exec/cpu_ldst_useronly_template.h
+ accel/tcg/tcg-cpus-rr.c     | 39 +++++++++++++++++++++++--
-@@ -XXX,XX +XXX,XX @@ glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr)
+ accel/tcg/tcg-cpus.c        | 58 -------------------------------------
-     return glue(glue(ld, USUFFIX), _p)(g2h(ptr));
+files changed, 71 insertions(+), 87 deletions(-)
  delete mode 100644 accel/tcg/tcg-cpus-mttcg.h
 diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
 deleted file mode 100644
 index XXXXXXX..XXXXXXX
 --- a/accel/tcg/tcg-cpus-mttcg.h
 +++ /dev/null
@@ -XXX,XX +XXX,XX @@
 -/*
 - * QEMU TCG Multi Threaded vCPUs implementation
 - *
 - * Copyright 2020 SUSE LLC
 - *
 - * This work is licensed under the terms of the GNU GPL, version 2 or later.
 - * See the COPYING file in the top-level directory.
 - */
 -
 -#ifndef TCG_CPUS_MTTCG_H
 -#define TCG_CPUS_MTTCG_H
 -
 -/*
 - * In the multi-threaded case each vCPU has its own thread. The TLS
 - * variable current_cpu can be used deep in the code to find the
 - * current CPUState for a given thread.
 - */
 -
 -void *tcg_cpu_thread_fn(void *arg);
 -
 -#endif /* TCG_CPUS_MTTCG_H */
 diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-rr.h
 +++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
  /* Kick all RR vCPUs. */
  void qemu_cpu_kick_rr_cpus(CPUState *unused);
 -void *tcg_rr_cpu_thread_fn(void *arg);
 +/* start the round robin vcpu thread */
 +void rr_start_vcpu_thread(CPUState *cpu);
  #endif /* TCG_CPUS_RR_H */
 diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.h
 +++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
  extern const CpusAccel tcg_cpus_icount;
  extern const CpusAccel tcg_cpus_rr;
 -void tcg_start_vcpu_thread(CPUState *cpu);
  void qemu_tcg_destroy_vcpu(CPUState *cpu);
  int tcg_cpu_exec(CPUState *cpu);
  void tcg_handle_interrupt(CPUState *cpu, int mask);
 diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-all.c
 +++ b/accel/tcg/tcg-all.c
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
      tcg_exec_init(s->tb_size * 1024 * 1024);
      mttcg_enabled = s->mttcg_enabled;
 +    /*
 +     * Initialize TCG regions
 +     */
 +    tcg_region_init();
 +
      if (mttcg_enabled) {
          cpus_register_accel(&tcg_cpus_mttcg);
      } else if (icount_enabled()) {
 diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-icount.c
 +++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
  }
-+#ifndef CODE_ACCESS
+ const CpusAccel tcg_cpus_icount = {
- static inline RES_TYPE
+-    .create_vcpu_thread = tcg_start_vcpu_thread,
- glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
++    .create_vcpu_thread = rr_start_vcpu_thread,
-                                                   abi_ptr ptr,
+     .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
-@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
-     clear_helper_retaddr();
+     .handle_interrupt = icount_handle_interrupt,
-     return ret;
+diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-mttcg.c
 +++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
  #include "hw/boards.h"
  #include "tcg-cpus.h"
 -#include "tcg-cpus-mttcg.h"
  /*
   * In the multi-threaded case each vCPU has its own thread. The TLS
@@ -XXX,XX +XXX,XX @@
   * current CPUState for a given thread.
   */
 -void *tcg_cpu_thread_fn(void *arg)
 +static void *tcg_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
@@ -XXX,XX +XXX,XX @@ static void mttcg_kick_vcpu_thread(CPUState *cpu)
      cpu_exit(cpu);
  }
++static void mttcg_start_vcpu_thread(CPUState *cpu)
++{
++    char thread_name[VCPU_THREAD_NAME_SIZE];
++
++    g_assert(tcg_enabled());
++
++    parallel_cpus = (current_machine->smp.max_cpus > 1);
++
++    cpu->thread = g_malloc0(sizeof(QemuThread));
++    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
++    qemu_cond_init(cpu->halt_cond);
++
++    /* create a thread per vCPU with TCG (MTTCG) */
++    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
++             cpu->cpu_index);
++
++    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
++                       cpu, QEMU_THREAD_JOINABLE);
++
++#ifdef _WIN32
++    cpu->hThread = qemu_thread_get_handle(cpu->thread);
 +#endif
++}
- #if DATA_SIZE <= 2
++
- static inline int
+ const CpusAccel tcg_cpus_mttcg = {
-@@ -XXX,XX +XXX,XX @@ glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr)
+-    .create_vcpu_thread = tcg_start_vcpu_thread,
-     return glue(glue(lds, SUFFIX), _p)(g2h(ptr));
++    .create_vcpu_thread = mttcg_start_vcpu_thread,
      .kick_vcpu_thread = mttcg_kick_vcpu_thread,
      .handle_interrupt = tcg_handle_interrupt,
 diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-rr.c
 +++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
   * elsewhere.
   */
 -void *tcg_rr_cpu_thread_fn(void *arg)
 +static void *tcg_rr_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
@@ -XXX,XX +XXX,XX @@ void *tcg_rr_cpu_thread_fn(void *arg)
      return NULL;
  }
-+#ifndef CODE_ACCESS
++void rr_start_vcpu_thread(CPUState *cpu)
- static inline int
++{
- glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
++    char thread_name[VCPU_THREAD_NAME_SIZE];
-                                                   abi_ptr ptr,
++    static QemuCond *single_tcg_halt_cond;
-@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
++    static QemuThread *single_tcg_cpu_thread;
-     clear_helper_retaddr();
++
-     return ret;
++    g_assert(tcg_enabled());
- }
++    parallel_cpus = false;
 +
 +    if (!single_tcg_cpu_thread) {
 +        cpu->thread = g_malloc0(sizeof(QemuThread));
 +        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
 +        qemu_cond_init(cpu->halt_cond);
 +
 +        /* share a single thread for all cpus with TCG */
 +        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
 +        qemu_thread_create(cpu->thread, thread_name,
 +                           tcg_rr_cpu_thread_fn,
 +                           cpu, QEMU_THREAD_JOINABLE);
 +
 +        single_tcg_halt_cond = cpu->halt_cond;
 +        single_tcg_cpu_thread = cpu->thread;
 +#ifdef _WIN32
 +        cpu->hThread = qemu_thread_get_handle(cpu->thread);
 +#endif
 +    } else {
 +        /* we share the thread */
 +        cpu->thread = single_tcg_cpu_thread;
 +        cpu->halt_cond = single_tcg_halt_cond;
 +        cpu->thread_id = first_cpu->thread_id;
 +        cpu->can_do_io = 1;
 +        cpu->created = true;
 +    }
 +}
 +
  const CpusAccel tcg_cpus_rr = {
 -    .create_vcpu_thread = tcg_start_vcpu_thread,
 +    .create_vcpu_thread = rr_start_vcpu_thread,
      .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
      .handle_interrupt = tcg_handle_interrupt,
 diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.c
 +++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
  #include "hw/boards.h"
  #include "tcg-cpus.h"
 -#include "tcg-cpus-mttcg.h"
 -#include "tcg-cpus-rr.h"
  /* common functionality among all TCG variants */
 -void tcg_start_vcpu_thread(CPUState *cpu)
 -{
 -    char thread_name[VCPU_THREAD_NAME_SIZE];
 -    static QemuCond *single_tcg_halt_cond;
 -    static QemuThread *single_tcg_cpu_thread;
 -    static int tcg_region_inited;
 -
 -    assert(tcg_enabled());
 -    /*
 -     * Initialize TCG regions--once. Now is a good time, because:
 -     * (1) TCG's init context, prologue and target globals have been set up.
 -     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
 -     *     -accel flag is processed, so the check doesn't work then).
 -     */
 -    if (!tcg_region_inited) {
 -        tcg_region_inited = 1;
 -        tcg_region_init();
 -        parallel_cpus = qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
 -    }
 -
 -    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
 -        cpu->thread = g_malloc0(sizeof(QemuThread));
 -        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
 -        qemu_cond_init(cpu->halt_cond);
 -
 -        if (qemu_tcg_mttcg_enabled()) {
 -            /* create a thread per vCPU with TCG (MTTCG) */
 -            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
 -                 cpu->cpu_index);
 -
 -            qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
 -                               cpu, QEMU_THREAD_JOINABLE);
 -
 -        } else {
 -            /* share a single thread for all cpus with TCG */
 -            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
 -            qemu_thread_create(cpu->thread, thread_name,
 -                               tcg_rr_cpu_thread_fn,
 -                               cpu, QEMU_THREAD_JOINABLE);
 -
 -            single_tcg_halt_cond = cpu->halt_cond;
 -            single_tcg_cpu_thread = cpu->thread;
 -        }
 -#ifdef _WIN32
 -        cpu->hThread = qemu_thread_get_handle(cpu->thread);
 -#endif
-+#endif /* CODE_ACCESS */
+-    } else {
-+#endif /* DATA_SIZE <= 2 */
+-        /* For non-MTTCG cases we share the thread */
+-        cpu->thread = single_tcg_cpu_thread;
- #ifndef CODE_ACCESS
+-        cpu->halt_cond = single_tcg_halt_cond;
- static inline void
+-        cpu->thread_id = first_cpu->thread_id;
 -        cpu->can_do_io = 1;
 -        cpu->created = true;
 -    }
 -}
 -
  void qemu_tcg_destroy_vcpu(CPUState *cpu)
  {
      cpu_thread_signal_destroyed(cpu);
 --
-.17.1
+.25.1

-[Qemu-devel] [PULL for-4.1 6/7] tcg: Remove duplicate #if !defined(CODE_ACCESS)
+[PULL 3/3] accel/tcg: rename tcg-cpus functions to match module name
-This code block is already surrounded by #ifndef CODE_ACCESS.
+From: Claudio Fontana <cfontana@suse.de>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Claudio Fontana <cfontana@suse.de>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Message-Id: <20201015143217.29337-4-cfontana@suse.de>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/exec/cpu_ldst_useronly_template.h | 2 --
+ accel/tcg/tcg-cpus-icount.h |  6 +--
-file changed, 2 deletions(-)
+ accel/tcg/tcg-cpus-rr.h     |  2 +-
  accel/tcg/tcg-cpus.h        |  6 +--
  accel/tcg/tcg-cpus-icount.c | 24 ++++++------
  accel/tcg/tcg-cpus-mttcg.c  | 10 ++---
  accel/tcg/tcg-cpus-rr.c     | 74 ++++++++++++++++++-------------------
  accel/tcg/tcg-cpus.c        |  6 +--
 files changed, 64 insertions(+), 64 deletions(-)
-diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h
+diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/exec/cpu_ldst_useronly_template.h
+--- a/accel/tcg/tcg-cpus-icount.h
-+++ b/include/exec/cpu_ldst_useronly_template.h
++++ b/accel/tcg/tcg-cpus-icount.h
-@@ -XXX,XX +XXX,XX @@ static inline void
+@@ -XXX,XX +XXX,XX @@
- glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr,
+ #ifndef TCG_CPUS_ICOUNT_H
-                                       RES_TYPE v)
+ #define TCG_CPUS_ICOUNT_H
- {
--#if !defined(CODE_ACCESS)
+-void handle_icount_deadline(void);
-     trace_guest_mem_before_exec(
+-void prepare_icount_for_run(CPUState *cpu);
-         env_cpu(env), ptr,
+-void process_icount_data(CPUState *cpu);
-         trace_mem_build_info(SHIFT, false, MO_TE, true));
++void icount_handle_deadline(void);
--#endif
++void icount_prepare_for_run(CPUState *cpu);
-     glue(glue(st, SUFFIX), _p)(g2h(ptr), v);
++void icount_process_data(CPUState *cpu);
- }
  #endif /* TCG_CPUS_ICOUNT_H */
 diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-rr.h
 +++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
  /* Kick all RR vCPUs. */
 -void qemu_cpu_kick_rr_cpus(CPUState *unused);
 +void rr_kick_vcpu_thread(CPUState *unused);
  /* start the round robin vcpu thread */
  void rr_start_vcpu_thread(CPUState *cpu);
 diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.h
 +++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
  extern const CpusAccel tcg_cpus_icount;
  extern const CpusAccel tcg_cpus_rr;
 -void qemu_tcg_destroy_vcpu(CPUState *cpu);
 -int tcg_cpu_exec(CPUState *cpu);
 -void tcg_handle_interrupt(CPUState *cpu, int mask);
 +void tcg_cpus_destroy(CPUState *cpu);
 +int tcg_cpus_exec(CPUState *cpu);
 +void tcg_cpus_handle_interrupt(CPUState *cpu, int mask);
  #endif /* TCG_CPUS_H */
 diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-icount.c
 +++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@
  #include "tcg-cpus-icount.h"
  #include "tcg-cpus-rr.h"
 -static int64_t tcg_get_icount_limit(void)
 +static int64_t icount_get_limit(void)
  {
      int64_t deadline;
@@ -XXX,XX +XXX,XX @@ static int64_t tcg_get_icount_limit(void)
      }
  }
 -static void notify_aio_contexts(void)
 +static void icount_notify_aio_contexts(void)
  {
      /* Wake up other AioContexts.  */
      qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
      qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
  }
 -void handle_icount_deadline(void)
 +void icount_handle_deadline(void)
  {
      assert(qemu_in_vcpu_thread());
      int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
                                                    QEMU_TIMER_ATTR_ALL);
      if (deadline == 0) {
 -        notify_aio_contexts();
 +        icount_notify_aio_contexts();
      }
  }
 -void prepare_icount_for_run(CPUState *cpu)
 +void icount_prepare_for_run(CPUState *cpu)
  {
      int insns_left;
      /*
 -     * These should always be cleared by process_icount_data after
 +     * These should always be cleared by icount_process_data after
       * each vCPU execution. However u16.high can be raised
 -     * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
 +     * asynchronously by cpu_exit/cpu_interrupt/tcg_cpus_handle_interrupt
       */
      g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
      g_assert(cpu->icount_extra == 0);
 -    cpu->icount_budget = tcg_get_icount_limit();
 +    cpu->icount_budget = icount_get_limit();
      insns_left = MIN(0xffff, cpu->icount_budget);
      cpu_neg(cpu)->icount_decr.u16.low = insns_left;
      cpu->icount_extra = cpu->icount_budget - insns_left;
@@ -XXX,XX +XXX,XX @@ void prepare_icount_for_run(CPUState *cpu)
      replay_mutex_lock();
      if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
 -        notify_aio_contexts();
 +        icount_notify_aio_contexts();
      }
  }
 -void process_icount_data(CPUState *cpu)
 +void icount_process_data(CPUState *cpu)
  {
      /* Account for executed instructions */
      icount_update(cpu);
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
  {
      int old_mask = cpu->interrupt_request;
 -    tcg_handle_interrupt(cpu, mask);
 +    tcg_cpus_handle_interrupt(cpu, mask);
      if (qemu_cpu_is_self(cpu) &&
          !cpu->can_do_io
          && (mask & ~old_mask) != 0) {
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
  const CpusAccel tcg_cpus_icount = {
      .create_vcpu_thread = rr_start_vcpu_thread,
 -    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 +    .kick_vcpu_thread = rr_kick_vcpu_thread,
      .handle_interrupt = icount_handle_interrupt,
      .get_virtual_clock = icount_get,
 diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-mttcg.c
 +++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
   * current CPUState for a given thread.
   */
 -static void *tcg_cpu_thread_fn(void *arg)
 +static void *mttcg_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
          if (cpu_can_run(cpu)) {
              int r;
              qemu_mutex_unlock_iothread();
 -            r = tcg_cpu_exec(cpu);
 +            r = tcg_cpus_exec(cpu);
              qemu_mutex_lock_iothread();
              switch (r) {
              case EXCP_DEBUG:
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
          qemu_wait_io_event(cpu);
      } while (!cpu->unplug || cpu_can_run(cpu));
 -    qemu_tcg_destroy_vcpu(cpu);
 +    tcg_cpus_destroy(cpu);
      qemu_mutex_unlock_iothread();
      rcu_unregister_thread();
      return NULL;
@@ -XXX,XX +XXX,XX @@ static void mttcg_start_vcpu_thread(CPUState *cpu)
      snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
               cpu->cpu_index);
 -    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
 +    qemu_thread_create(cpu->thread, thread_name, mttcg_cpu_thread_fn,
                         cpu, QEMU_THREAD_JOINABLE);
  #ifdef _WIN32
@@ -XXX,XX +XXX,XX @@ const CpusAccel tcg_cpus_mttcg = {
      .create_vcpu_thread = mttcg_start_vcpu_thread,
      .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 -    .handle_interrupt = tcg_handle_interrupt,
 +    .handle_interrupt = tcg_cpus_handle_interrupt,
  };
 diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-rr.c
 +++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@
  #include "tcg-cpus-icount.h"
  /* Kick all RR vCPUs */
 -void qemu_cpu_kick_rr_cpus(CPUState *unused)
 +void rr_kick_vcpu_thread(CPUState *unused)
  {
      CPUState *cpu;
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick_rr_cpus(CPUState *unused)
   * idleness is complete.
   */
 -static QEMUTimer *tcg_kick_vcpu_timer;
 -static CPUState *tcg_current_rr_cpu;
 +static QEMUTimer *rr_kick_vcpu_timer;
 +static CPUState *rr_current_cpu;
  #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 -static inline int64_t qemu_tcg_next_kick(void)
 +static inline int64_t rr_next_kick_time(void)
  {
      return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
  }
  /* Kick the currently round-robin scheduled vCPU to next */
 -static void qemu_cpu_kick_rr_next_cpu(void)
 +static void rr_kick_next_cpu(void)
  {
      CPUState *cpu;
      do {
 -        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
 +        cpu = qatomic_mb_read(&rr_current_cpu);
          if (cpu) {
              cpu_exit(cpu);
          }
 -    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
 +    } while (cpu != qatomic_mb_read(&rr_current_cpu));
  }
 -static void kick_tcg_thread(void *opaque)
 +static void rr_kick_thread(void *opaque)
  {
 -    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 -    qemu_cpu_kick_rr_next_cpu();
 +    timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
 +    rr_kick_next_cpu();
  }
 -static void start_tcg_kick_timer(void)
 +static void rr_start_kick_timer(void)
  {
 -    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 -        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 -                                           kick_tcg_thread, NULL);
 +    if (!rr_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 +        rr_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 +                                           rr_kick_thread, NULL);
      }
 -    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
 -        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 +    if (rr_kick_vcpu_timer && !timer_pending(rr_kick_vcpu_timer)) {
 +        timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
      }
  }
 -static void stop_tcg_kick_timer(void)
 +static void rr_stop_kick_timer(void)
  {
 -    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
 -        timer_del(tcg_kick_vcpu_timer);
 +    if (rr_kick_vcpu_timer && timer_pending(rr_kick_vcpu_timer)) {
 +        timer_del(rr_kick_vcpu_timer);
      }
  }
 -static void qemu_tcg_rr_wait_io_event(void)
 +static void rr_wait_io_event(void)
  {
      CPUState *cpu;
      while (all_cpu_threads_idle()) {
 -        stop_tcg_kick_timer();
 +        rr_stop_kick_timer();
          qemu_cond_wait_iothread(first_cpu->halt_cond);
      }
 -    start_tcg_kick_timer();
 +    rr_start_kick_timer();
      CPU_FOREACH(cpu) {
          qemu_wait_io_event_common(cpu);
@@ -XXX,XX +XXX,XX @@ static void qemu_tcg_rr_wait_io_event(void)
   * Destroy any remaining vCPUs which have been unplugged and have
   * finished running
   */
 -static void deal_with_unplugged_cpus(void)
 +static void rr_deal_with_unplugged_cpus(void)
  {
      CPUState *cpu;
      CPU_FOREACH(cpu) {
          if (cpu->unplug && !cpu_can_run(cpu)) {
 -            qemu_tcg_destroy_vcpu(cpu);
 +            tcg_cpus_destroy(cpu);
              break;
          }
      }
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
   * elsewhere.
   */
 -static void *tcg_rr_cpu_thread_fn(void *arg)
 +static void *rr_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
          }
      }
 -    start_tcg_kick_timer();
 +    rr_start_kick_timer();
      cpu = first_cpu;
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
               * Run the timers here.  This is much more efficient than
               * waking up the I/O thread and waiting for completion.
               */
 -            handle_icount_deadline();
 +            icount_handle_deadline();
          }
          replay_mutex_unlock();
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
          while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
 -            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
 +            qatomic_mb_set(&rr_current_cpu, cpu);
              current_cpu = cpu;
              qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
                  qemu_mutex_unlock_iothread();
                  if (icount_enabled()) {
 -                    prepare_icount_for_run(cpu);
 +                    icount_prepare_for_run(cpu);
                  }
 -                r = tcg_cpu_exec(cpu);
 +                r = tcg_cpus_exec(cpu);
                  if (icount_enabled()) {
 -                    process_icount_data(cpu);
 +                    icount_process_data(cpu);
                  }
                  qemu_mutex_lock_iothread();
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
          } /* while (cpu && !cpu->exit_request).. */
          /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
 -        qatomic_set(&tcg_current_rr_cpu, NULL);
 +        qatomic_set(&rr_current_cpu, NULL);
          if (cpu && cpu->exit_request) {
              qatomic_mb_set(&cpu->exit_request, 0);
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
              qemu_notify_event();
          }
 -        qemu_tcg_rr_wait_io_event();
 -        deal_with_unplugged_cpus();
 +        rr_wait_io_event();
 +        rr_deal_with_unplugged_cpus();
      }
      rcu_unregister_thread();
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
          /* share a single thread for all cpus with TCG */
          snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
          qemu_thread_create(cpu->thread, thread_name,
 -                           tcg_rr_cpu_thread_fn,
 +                           rr_cpu_thread_fn,
                             cpu, QEMU_THREAD_JOINABLE);
          single_tcg_halt_cond = cpu->halt_cond;
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
  const CpusAccel tcg_cpus_rr = {
      .create_vcpu_thread = rr_start_vcpu_thread,
 -    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 +    .kick_vcpu_thread = rr_kick_vcpu_thread,
 -    .handle_interrupt = tcg_handle_interrupt,
 +    .handle_interrupt = tcg_cpus_handle_interrupt,
  };
 diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.c
 +++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
  /* common functionality among all TCG variants */
 -void qemu_tcg_destroy_vcpu(CPUState *cpu)
 +void tcg_cpus_destroy(CPUState *cpu)
  {
      cpu_thread_signal_destroyed(cpu);
  }
 -int tcg_cpu_exec(CPUState *cpu)
 +int tcg_cpus_exec(CPUState *cpu)
  {
      int ret;
  #ifdef CONFIG_PROFILER
@@ -XXX,XX +XXX,XX @@ int tcg_cpu_exec(CPUState *cpu)
  }
  /* mask must never be zero, except for A20 change call */
 -void tcg_handle_interrupt(CPUState *cpu, int mask)
 +void tcg_cpus_handle_interrupt(CPUState *cpu, int mask)
  {
      g_assert(qemu_mutex_iothread_locked());
 --
-.17.1
+.25.1

-[Qemu-devel] [PULL for-4.1 7/7] tcg: Release mmap_lock on translation fault
+Deleted patch
-Turn helper_retaddr into a multi-state flag that may now also
-indicate when we're performing a read on behalf of the translator.
-In this case, release the mmap_lock before the longjmp back to
-the main cpu loop, and thereby avoid a failing assert therein.
-Fixes: https://bugs.launchpad.net/qemu/+bug/1832353
-Tested-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- include/exec/cpu_ldst_useronly_template.h | 20 +++++--
- accel/tcg/user-exec.c                     | 66 ++++++++++++++++-------
-files changed, 63 insertions(+), 23 deletions(-)
-diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/exec/cpu_ldst_useronly_template.h
-+++ b/include/exec/cpu_ldst_useronly_template.h
-@@ -XXX,XX +XXX,XX @@
- static inline RES_TYPE
- glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr)
- {
--#if !defined(CODE_ACCESS)
-+#ifdef CODE_ACCESS
-+    RES_TYPE ret;
-+    set_helper_retaddr(1);
-+    ret = glue(glue(ld, USUFFIX), _p)(g2h(ptr));
-+    clear_helper_retaddr();
-+    return ret;
-+#else
-     trace_guest_mem_before_exec(
-         env_cpu(env), ptr,
-         trace_mem_build_info(SHIFT, false, MO_TE, false));
--#endif
-     return glue(glue(ld, USUFFIX), _p)(g2h(ptr));
-+#endif
- }
- #ifndef CODE_ACCESS
-@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
- static inline int
- glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr)
- {
--#if !defined(CODE_ACCESS)
-+#ifdef CODE_ACCESS
-+    int ret;
-+    set_helper_retaddr(1);
-+    ret = glue(glue(lds, SUFFIX), _p)(g2h(ptr));
-+    clear_helper_retaddr();
-+    return ret;
-+#else
-     trace_guest_mem_before_exec(
-         env_cpu(env), ptr,
-         trace_mem_build_info(SHIFT, true, MO_TE, false));
--#endif
-     return glue(glue(lds, SUFFIX), _p)(g2h(ptr));
-+#endif
- }
- #ifndef CODE_ACCESS
-diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/user-exec.c
-+++ b/accel/tcg/user-exec.c
-@@ -XXX,XX +XXX,XX @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
-     CPUState *cpu = current_cpu;
-     CPUClass *cc;
-     unsigned long address = (unsigned long)info->si_addr;
--    MMUAccessType access_type;
-+    MMUAccessType access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
--    /* We must handle PC addresses from two different sources:
--     * a call return address and a signal frame address.
--     *
--     * Within cpu_restore_state_from_tb we assume the former and adjust
--     * the address by -GETPC_ADJ so that the address is within the call
--     * insn so that addr does not accidentally match the beginning of the
--     * next guest insn.
--     *
--     * However, when the PC comes from the signal frame, it points to
--     * the actual faulting host insn and not a call insn.  Subtracting
--     * GETPC_ADJ in that case may accidentally match the previous guest insn.
--     *
--     * So for the later case, adjust forward to compensate for what
--     * will be done later by cpu_restore_state_from_tb.
--     */
--    if (helper_retaddr) {
-+    switch (helper_retaddr) {
-+    default:
-+        /*
-+         * Fault during host memory operation within a helper function.
-+         * The helper's host return address, saved here, gives us a
-+         * pointer into the generated code that will unwind to the
-+         * correct guest pc.
-+         */
-         pc = helper_retaddr;
--    } else {
-+        break;
-+
-+    case 0:
-+        /*
-+         * Fault during host memory operation within generated code.
-+         * (Or, a unrelated bug within qemu, but we can't tell from here).
-+         *
-+         * We take the host pc from the signal frame.  However, we cannot
-+         * use that value directly.  Within cpu_restore_state_from_tb, we
-+         * assume PC comes from GETPC(), as used by the helper functions,
-+         * so we adjust the address by -GETPC_ADJ to form an address that
-+         * is within the call insn, so that the address does not accidentially
-+         * match the beginning of the next guest insn.  However, when the
-+         * pc comes from the signal frame it points to the actual faulting
-+         * host memory insn and not the return from a call insn.
-+         *
-+         * Therefore, adjust to compensate for what will be done later
-+         * by cpu_restore_state_from_tb.
-+         */
-         pc += GETPC_ADJ;
-+        break;
-+
-+    case 1:
-+        /*
-+         * Fault during host read for translation, or loosely, "execution".
-+         *
-+         * The guest pc is already pointing to the start of the TB for which
-+         * code is being generated.  If the guest translator manages the
-+         * page crossings correctly, this is exactly the correct address
-+         * (and if the translator doesn't handle page boundaries correctly
-+         * there's little we can do about that here).  Therefore, do not
-+         * trigger the unwinder.
-+         *
-+         * Like tb_gen_code, release the memory lock before cpu_loop_exit.
-+         */
-+        pc = 0;
-+        access_type = MMU_INST_FETCH;
-+        mmap_unlock();
-+        break;
-     }
-     /* For synchronous signals we expect to be coming from the vCPU
-@@ -XXX,XX +XXX,XX @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
-     clear_helper_retaddr();
-     cc = CPU_GET_CLASS(cpu);
--    access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
-     cc->tlb_fill(cpu, address, 0, access_type, MMU_USER_IDX, false, pc);
-     g_assert_not_reached();
- }
---
-.17.1

The following changes since commit 1316b1ddc8a05e418c8134243f8bff8cccbbccb1:

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2019-07-12 15:38:22 +0100)

are available in the Git repository at:

https://github.com/rth7680/qemu.git tags/pull-tcg-20190714

for you to fetch changes up to 52ba13f042714c4086416973fb88e2465e0888a1:

tcg: Release mmap_lock on translation fault (2019-07-14 12:19:01 +0200)

----------------------------------------------------------------
Fixes for 3 tcg bugs

----------------------------------------------------------------
Richard Henderson (7):
      tcg: Fix constant folding of INDEX_op_extract2_i32
      tcg/aarch64: Fix output of extract2 opcodes
      include/qemu/atomic.h: Add signal_barrier
      tcg: Introduce set/clear_helper_retaddr
      tcg: Remove cpu_ld*_code_ra
      tcg: Remove duplicate #if !defined(CODE_ACCESS)
      tcg: Release mmap_lock on translation fault

On a 64-bit host, discard any replications of the 32-bit
sign bit when performing the shift and merge.

Fixes: https://bugs.launchpad.net/bugs/1834496
Tested-by: Christophe Lyon <christophe.lyon@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
                 if (opc == INDEX_op_extract2_i64) {
                     tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
                 } else {
-                    tmp = (v1 >> op->args[3]) | (v2 << (32 - op->args[3]));
-                    tmp = (int32_t)tmp;
+                    tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
+                                    ((uint32_t)v2 << (32 - op->args[3])));
                 }
                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
                 break;
-- 
2.17.1

This patch fixes two problems:
(1) The inputs to the EXTR insn were reversed,
(2) The input constraints use rZ, which means that we need to use
    the REG0 macro in order to supply XZR for a constant 0 input.

Fixes: 464c2969d5d
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.inc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_extract2_i64:
     case INDEX_op_extract2_i32:
-        tcg_out_extr(s, ext, a0, a1, a2, args[3]);
+        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
         break;
 
     case INDEX_op_add2_i32:
-- 
2.17.1

We have some potential race conditions vs our user-exec signal
handler that will be solved with this barrier.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/qemu/atomic.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index XXXXXXX..XXXXXXX 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -XXX,XX +XXX,XX @@
 #define smp_read_barrier_depends()   barrier()
 #endif
 
+/*
+ * A signal barrier forces all pending local memory ops to be observed before
+ * a SIGSEGV is delivered to the *same* thread.  In practice this is exactly
+ * the same as barrier(), but since we have the correct builtin, use it.
+ */
+#define signal_barrier()    __atomic_signal_fence(__ATOMIC_SEQ_CST)
+
 /* Sanity check that the size of an atomic operation isn't "overly large".
  * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not
  * want to use them because we ought not need them, and this lets us do a
@@ -XXX,XX +XXX,XX @@
 #define smp_read_barrier_depends()   barrier()
 #endif
 
+#ifndef signal_barrier
+#define signal_barrier()    barrier()
+#endif
+
 /* These will only be atomic if the processor does the fetch or store
  * in a single issue memory operation
  */
-- 
2.17.1

At present we have a potential error in that helper_retaddr contains
data for handle_cpu_signal, but we have not ensured that those stores
will be scheduled properly before the operation that may fault.

It might be that these races are not in practice observable, due to
our use of -fno-strict-aliasing, but better safe than sorry.

Adjust all of the setters of helper_retaddr.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu_ldst.h                   | 20 +++++++++++
 include/exec/cpu_ldst_useronly_template.h | 12 +++----
 accel/tcg/user-exec.c                     | 11 +++---
 target/arm/helper-a64.c                   |  8 ++---
 target/arm/sve_helper.c                   | 43 +++++++++++------------
 5 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -XXX,XX +XXX,XX @@ typedef target_ulong abi_ptr;
 
 extern __thread uintptr_t helper_retaddr;
 
+static inline void set_helper_retaddr(uintptr_t ra)
+{
+    helper_retaddr = ra;
+    /*
+     * Ensure that this write is visible to the SIGSEGV handler that
+     * may be invoked due to a subsequent invalid memory operation.
+     */
+    signal_barrier();
+}
+
+static inline void clear_helper_retaddr(void)
+{
+    /*
+     * Ensure that previous memory operations have succeeded before
+     * removing the data visible to the signal handler.
+     */
+    signal_barrier();
+    helper_retaddr = 0;
+}
+
 /* In user-only mode we provide only the _code and _data accessors. */
 
 #define MEMSUFFIX _data
diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu_ldst_useronly_template.h
+++ b/include/exec/cpu_ldst_useronly_template.h
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
                                                   uintptr_t retaddr)
 {
     RES_TYPE ret;
-    helper_retaddr = retaddr;
+    set_helper_retaddr(retaddr);
     ret = glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(env, ptr);
-    helper_retaddr = 0;
+    clear_helper_retaddr();
     return ret;
 }
 
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
                                                   uintptr_t retaddr)
 {
     int ret;
-    helper_retaddr = retaddr;
+    set_helper_retaddr(retaddr);
     ret = glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(env, ptr);
-    helper_retaddr = 0;
+    clear_helper_retaddr();
     return ret;
 }
 #endif
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
                                                   RES_TYPE v,
                                                   uintptr_t retaddr)
 {
-    helper_retaddr = retaddr;
+    set_helper_retaddr(retaddr);
     glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(env, ptr, v);
-    helper_retaddr = 0;
+    clear_helper_retaddr();
 }
 #endif
 
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -XXX,XX +XXX,XX @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
              * currently executing TB was modified and must be exited
              * immediately.  Clear helper_retaddr for next execution.
              */
-            helper_retaddr = 0;
+            clear_helper_retaddr();
             cpu_exit_tb_from_sighandler(cpu, old_set);
             /* NORETURN */
 
@@ -XXX,XX +XXX,XX @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
      * an exception.  Undo signal and retaddr state prior to longjmp.
      */
     sigprocmask(SIG_SETMASK, old_set, NULL);
-    helper_retaddr = 0;
+    clear_helper_retaddr();
 
     cc = CPU_GET_CLASS(cpu);
     access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     if (unlikely(addr & (size - 1))) {
         cpu_loop_exit_atomic(env_cpu(env), retaddr);
     }
-    helper_retaddr = retaddr;
-    return g2h(addr);
+    void *ret = g2h(addr);
+    set_helper_retaddr(retaddr);
+    return ret;
 }
 
 /* Macro to call the above, with local variables from the use context.  */
 #define ATOMIC_MMU_DECLS do {} while (0)
 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
-#define ATOMIC_MMU_CLEANUP do { helper_retaddr = 0; } while (0)
+#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
 
 #define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
 #define EXTRA_ARGS
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
     /* ??? Enforce alignment.  */
     uint64_t *haddr = g2h(addr);
 
-    helper_retaddr = ra;
+    set_helper_retaddr(ra);
     o0 = ldq_le_p(haddr + 0);
     o1 = ldq_le_p(haddr + 1);
     oldv = int128_make128(o0, o1);
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
         stq_le_p(haddr + 0, int128_getlo(newv));
         stq_le_p(haddr + 1, int128_gethi(newv));
     }
-    helper_retaddr = 0;
+    clear_helper_retaddr();
 #else
     int mem_idx = cpu_mmu_index(env, false);
     TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
     /* ??? Enforce alignment.  */
     uint64_t *haddr = g2h(addr);
 
-    helper_retaddr = ra;
+    set_helper_retaddr(ra);
     o1 = ldq_be_p(haddr + 0);
     o0 = ldq_be_p(haddr + 1);
     oldv = int128_make128(o0, o1);
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
         stq_be_p(haddr + 0, int128_gethi(newv));
         stq_be_p(haddr + 1, int128_getlo(newv));
     }
-    helper_retaddr = 0;
+    clear_helper_retaddr();
 #else
     int mem_idx = cpu_mmu_index(env, false);
     TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -XXX,XX +XXX,XX @@ static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
     return MIN(split, mem_max - mem_off) + mem_off;
 }
 
-static inline void set_helper_retaddr(uintptr_t ra)
-{
-#ifdef CONFIG_USER_ONLY
-    helper_retaddr = ra;
+#ifndef CONFIG_USER_ONLY
+/* These are normally defined only for CONFIG_USER_ONLY in <exec/cpu_ldst.h> */
+static inline void set_helper_retaddr(uintptr_t ra) { }
+static inline void clear_helper_retaddr(void) { }
 #endif
-}
 
 /*
  * The result of tlb_vaddr_to_host for user-only is just g2h(x),
@@ -XXX,XX +XXX,XX @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
         if (test_host_page(host)) {
             mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
             tcg_debug_assert(mem_off == mem_max);
-            set_helper_retaddr(0);
+            clear_helper_retaddr();
             /* After having taken any fault, zero leading inactive elements. */
             swap_memzero(vd, reg_off);
             return;
@@ -XXX,XX +XXX,XX @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
     }
 #endif
 
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
     memcpy(vd, &scratch, reg_max);
 }
 
@@ -XXX,XX +XXX,XX @@ static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
             addr += 2 * size;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 
     /* Wait until all exceptions have been raised to write back.  */
     memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
@@ -XXX,XX +XXX,XX @@ static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
             addr += 3 * size;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 
     /* Wait until all exceptions have been raised to write back.  */
     memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
@@ -XXX,XX +XXX,XX @@ static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
             addr += 4 * size;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 
     /* Wait until all exceptions have been raised to write back.  */
     memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
@@ -XXX,XX +XXX,XX @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
         if (test_host_page(host)) {
             mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
             tcg_debug_assert(mem_off == mem_max);
-            set_helper_retaddr(0);
+            clear_helper_retaddr();
             /* After any fault, zero any leading inactive elements.  */
             swap_memzero(vd, reg_off);
             return;
@@ -XXX,XX +XXX,XX @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
     }
 #endif
 
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
     record_fault(env, reg_off, reg_max);
 }
 
@@ -XXX,XX +XXX,XX @@ static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
             addr += msize;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 }
 
 static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
@@ -XXX,XX +XXX,XX @@ static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
             addr += 2 * msize;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 }
 
 static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
@@ -XXX,XX +XXX,XX @@ static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
             addr += 3 * msize;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 }
 
 static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
@@ -XXX,XX +XXX,XX @@ static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
             addr += 4 * msize;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 }
 
 #define DO_STN_1(N, NAME, ESIZE) \
@@ -XXX,XX +XXX,XX @@ static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
             i += 4, pg >>= 4;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 
     /* Wait until all exceptions have been raised to write back.  */
     memcpy(vd, &scratch, oprsz);
@@ -XXX,XX +XXX,XX @@ static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
             tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
         }
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 
     /* Wait until all exceptions have been raised to write back.  */
     memcpy(vd, &scratch, oprsz * 8);
@@ -XXX,XX +XXX,XX @@ static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
         tlb_fn(env, vd, reg_off, addr, oi, ra);
 
         /* The rest of the reads will be non-faulting.  */
-        set_helper_retaddr(0);
+        clear_helper_retaddr();
     }
 
     /* After any fault, zero the leading predicated false elements.  */
@@ -XXX,XX +XXX,XX @@ static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
         tlb_fn(env, vd, reg_off, addr, oi, ra);
 
         /* The rest of the reads will be non-faulting.  */
-        set_helper_retaddr(0);
+        clear_helper_retaddr();
     }
 
     /* After any fault, zero the leading predicated false elements.  */
@@ -XXX,XX +XXX,XX @@ static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
             i += 4, pg >>= 4;
         } while (i & 15);
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 }
 
 static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
@@ -XXX,XX +XXX,XX @@ static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
             tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
         }
     }
-    set_helper_retaddr(0);
+    clear_helper_retaddr();
 }
 
 #define DO_ST1_ZPZ_S(MEM, OFS) \
-- 
2.17.1

These functions are not used, and are not usable in the
context of code generation, because we never have a helper
return address to pass in to them.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu_ldst_useronly_template.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

Turn helper_retaddr into a multi-state flag that may now also
indicate when we're performing a read on behalf of the translator.
In this case, release the mmap_lock before the longjmp back to
the main cpu loop, and thereby avoid a failing assert therein.

Fixes: https://bugs.launchpad.net/qemu/+bug/1832353
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu_ldst_useronly_template.h | 20 +++++--
 accel/tcg/user-exec.c                     | 66 ++++++++++++++++-------
 2 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu_ldst_useronly_template.h
+++ b/include/exec/cpu_ldst_useronly_template.h
@@ -XXX,XX +XXX,XX @@
 static inline RES_TYPE
 glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr)
 {
-#if !defined(CODE_ACCESS)
+#ifdef CODE_ACCESS
+    RES_TYPE ret;
+    set_helper_retaddr(1);
+    ret = glue(glue(ld, USUFFIX), _p)(g2h(ptr));
+    clear_helper_retaddr();
+    return ret;
+#else
     trace_guest_mem_before_exec(
         env_cpu(env), ptr,
         trace_mem_build_info(SHIFT, false, MO_TE, false));
-#endif
     return glue(glue(ld, USUFFIX), _p)(g2h(ptr));
+#endif
 }
 
 #ifndef CODE_ACCESS
@@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
 static inline int
 glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr)
 {
-#if !defined(CODE_ACCESS)
+#ifdef CODE_ACCESS
+    int ret;
+    set_helper_retaddr(1);
+    ret = glue(glue(lds, SUFFIX), _p)(g2h(ptr));
+    clear_helper_retaddr();
+    return ret;
+#else
     trace_guest_mem_before_exec(
         env_cpu(env), ptr,
         trace_mem_build_info(SHIFT, true, MO_TE, false));
-#endif
     return glue(glue(lds, SUFFIX), _p)(g2h(ptr));
+#endif
 }
 
 #ifndef CODE_ACCESS
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -XXX,XX +XXX,XX @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
     CPUState *cpu = current_cpu;
     CPUClass *cc;
     unsigned long address = (unsigned long)info->si_addr;
-    MMUAccessType access_type;
+    MMUAccessType access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
 
-    /* We must handle PC addresses from two different sources:
-     * a call return address and a signal frame address.
-     *
-     * Within cpu_restore_state_from_tb we assume the former and adjust
-     * the address by -GETPC_ADJ so that the address is within the call
-     * insn so that addr does not accidentally match the beginning of the
-     * next guest insn.
-     *
-     * However, when the PC comes from the signal frame, it points to
-     * the actual faulting host insn and not a call insn.  Subtracting
-     * GETPC_ADJ in that case may accidentally match the previous guest insn.
-     *
-     * So for the later case, adjust forward to compensate for what
-     * will be done later by cpu_restore_state_from_tb.
-     */
-    if (helper_retaddr) {
+    switch (helper_retaddr) {
+    default:
+        /*
+         * Fault during host memory operation within a helper function.
+         * The helper's host return address, saved here, gives us a
+         * pointer into the generated code that will unwind to the
+         * correct guest pc.
+         */
         pc = helper_retaddr;
-    } else {
+        break;
+
+    case 0:
+        /*
+         * Fault during host memory operation within generated code.
+         * (Or, a unrelated bug within qemu, but we can't tell from here).
+         *
+         * We take the host pc from the signal frame.  However, we cannot
+         * use that value directly.  Within cpu_restore_state_from_tb, we
+         * assume PC comes from GETPC(), as used by the helper functions,
+         * so we adjust the address by -GETPC_ADJ to form an address that
+         * is within the call insn, so that the address does not accidentially
+         * match the beginning of the next guest insn.  However, when the
+         * pc comes from the signal frame it points to the actual faulting
+         * host memory insn and not the return from a call insn.
+         *
+         * Therefore, adjust to compensate for what will be done later
+         * by cpu_restore_state_from_tb.
+         */
         pc += GETPC_ADJ;
+        break;
+
+    case 1:
+        /*
+         * Fault during host read for translation, or loosely, "execution".
+         *
+         * The guest pc is already pointing to the start of the TB for which
+         * code is being generated.  If the guest translator manages the
+         * page crossings correctly, this is exactly the correct address
+         * (and if the translator doesn't handle page boundaries correctly
+         * there's little we can do about that here).  Therefore, do not
+         * trigger the unwinder.
+         *
+         * Like tb_gen_code, release the memory lock before cpu_loop_exit.
+         */
+        pc = 0;
+        access_type = MMU_INST_FETCH;
+        mmap_unlock();
+        break;
     }
 
     /* For synchronous signals we expect to be coming from the vCPU
@@ -XXX,XX +XXX,XX @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
     clear_helper_retaddr();
 
     cc = CPU_GET_CLASS(cpu);
-    access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
     cc->tlb_fill(cpu, address, 0, access_type, MMU_USER_IDX, false, pc);
     g_assert_not_reached();
 }
-- 
2.17.1

The following changes since commit 2ecfc0657afa5d29a373271b342f704a1a3c6737:

Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2020-12-10' into staging (2020-12-10 17:01:05 +0000)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20201210

for you to fetch changes up to 9e2658d62ebc23efe7df43fc0e306f129510d874:

accel/tcg: rename tcg-cpus functions to match module name (2020-12-10 17:44:10 -0600)

----------------------------------------------------------------
Split CpusAccel for tcg variants

----------------------------------------------------------------
Claudio Fontana (3):
      accel/tcg: split CpusAccel into three TCG variants
      accel/tcg: split tcg_start_vcpu_thread
      accel/tcg: rename tcg-cpus functions to match module name

accel/tcg/tcg-cpus-icount.h |  17 ++
 accel/tcg/tcg-cpus-rr.h     |  21 ++
 accel/tcg/tcg-cpus.h        |  12 +-
 accel/tcg/tcg-all.c         |  13 +-
 accel/tcg/tcg-cpus-icount.c | 147 +++++++++++++
 accel/tcg/tcg-cpus-mttcg.c  | 140 ++++++++++++
 accel/tcg/tcg-cpus-rr.c     | 305 ++++++++++++++++++++++++++
 accel/tcg/tcg-cpus.c        | 506 +-------------------------------------------
 softmmu/icount.c            |   2 +-
 accel/tcg/meson.build       |   9 +-
 10 files changed, 670 insertions(+), 502 deletions(-)
 create mode 100644 accel/tcg/tcg-cpus-icount.h
 create mode 100644 accel/tcg/tcg-cpus-rr.h
 create mode 100644 accel/tcg/tcg-cpus-icount.c
 create mode 100644 accel/tcg/tcg-cpus-mttcg.c
 create mode 100644 accel/tcg/tcg-cpus-rr.c

From: Claudio Fontana <cfontana@suse.de>

split up the CpusAccel tcg_cpus into three TCG variants:

tcg_cpus_rr (single threaded, round robin cpus)
tcg_cpus_icount (same as rr, but with instruction counting enabled)
tcg_cpus_mttcg (multi-threaded cpus)

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Claudio Fontana <cfontana@suse.de>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20201015143217.29337-2-cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-cpus-icount.h |  17 ++
 accel/tcg/tcg-cpus-mttcg.h  |  21 ++
 accel/tcg/tcg-cpus-rr.h     |  20 ++
 accel/tcg/tcg-cpus.h        |  13 +-
 accel/tcg/tcg-all.c         |   8 +-
 accel/tcg/tcg-cpus-icount.c | 147 +++++++++++
 accel/tcg/tcg-cpus-mttcg.c  | 117 +++++++++
 accel/tcg/tcg-cpus-rr.c     | 270 ++++++++++++++++++++
 accel/tcg/tcg-cpus.c        | 484 ++----------------------------------
 softmmu/icount.c            |   2 +-
 accel/tcg/meson.build       |   9 +-
 11 files changed, 646 insertions(+), 462 deletions(-)
 create mode 100644 accel/tcg/tcg-cpus-icount.h
 create mode 100644 accel/tcg/tcg-cpus-mttcg.h
 create mode 100644 accel/tcg/tcg-cpus-rr.h
 create mode 100644 accel/tcg/tcg-cpus-icount.c
 create mode 100644 accel/tcg/tcg-cpus-mttcg.c
 create mode 100644 accel/tcg/tcg-cpus-rr.c

diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-icount.h
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Single Threaded vCPUs implementation using instruction counting
+ *
+ * Copyright 2020 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TCG_CPUS_ICOUNT_H
+#define TCG_CPUS_ICOUNT_H
+
+void handle_icount_deadline(void);
+void prepare_icount_for_run(CPUState *cpu);
+void process_icount_data(CPUState *cpu);
+
+#endif /* TCG_CPUS_ICOUNT_H */
diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-mttcg.h
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Multi Threaded vCPUs implementation
+ *
+ * Copyright 2020 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TCG_CPUS_MTTCG_H
+#define TCG_CPUS_MTTCG_H
+
+/*
+ * In the multi-threaded case each vCPU has its own thread. The TLS
+ * variable current_cpu can be used deep in the code to find the
+ * current CPUState for a given thread.
+ */
+
+void *tcg_cpu_thread_fn(void *arg);
+
+#endif /* TCG_CPUS_MTTCG_H */
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Single Threaded vCPUs implementation
+ *
+ * Copyright 2020 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TCG_CPUS_RR_H
+#define TCG_CPUS_RR_H
+
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
+
+/* Kick all RR vCPUs. */
+void qemu_cpu_kick_rr_cpus(CPUState *unused);
+
+void *tcg_rr_cpu_thread_fn(void *arg);
+
+#endif /* TCG_CPUS_RR_H */
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.h
+++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@
 /*
- * Accelerator CPUS Interface
+ * QEMU TCG vCPU common functionality
+ *
+ * Functionality common to all TCG vcpu variants: mttcg, rr and icount.
  *
  * Copyright 2020 SUSE LLC
  *
@@ -XXX,XX +XXX,XX @@
 
 #include "sysemu/cpus.h"
 
-extern const CpusAccel tcg_cpus;
+extern const CpusAccel tcg_cpus_mttcg;
+extern const CpusAccel tcg_cpus_icount;
+extern const CpusAccel tcg_cpus_rr;
+
+void tcg_start_vcpu_thread(CPUState *cpu);
+void qemu_tcg_destroy_vcpu(CPUState *cpu);
+int tcg_cpu_exec(CPUState *cpu);
+void tcg_handle_interrupt(CPUState *cpu, int mask);
 
 #endif /* TCG_CPUS_H */
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
 
     tcg_exec_init(s->tb_size * 1024 * 1024);
     mttcg_enabled = s->mttcg_enabled;
-    cpus_register_accel(&tcg_cpus);
 
+    if (mttcg_enabled) {
+        cpus_register_accel(&tcg_cpus_mttcg);
+    } else if (icount_enabled()) {
+        cpus_register_accel(&tcg_cpus_icount);
+    } else {
+        cpus_register_accel(&tcg_cpus_rr);
+    }
     return 0;
 }
 
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Single Threaded vCPUs implementation using instruction counting
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/tcg.h"
+#include "sysemu/replay.h"
+#include "qemu/main-loop.h"
+#include "qemu/guest-random.h"
+#include "exec/exec-all.h"
+#include "hw/boards.h"
+
+#include "tcg-cpus.h"
+#include "tcg-cpus-icount.h"
+#include "tcg-cpus-rr.h"
+
+static int64_t tcg_get_icount_limit(void)
+{
+    int64_t deadline;
+
+    if (replay_mode != REPLAY_MODE_PLAY) {
+        /*
+         * Include all the timers, because they may need an attention.
+         * Too long CPU execution may create unnecessary delay in UI.
+         */
+        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                              QEMU_TIMER_ATTR_ALL);
+        /* Check realtime timers, because they help with input processing */
+        deadline = qemu_soonest_timeout(deadline,
+                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
+                                           QEMU_TIMER_ATTR_ALL));
+
+        /*
+         * Maintain prior (possibly buggy) behaviour where if no deadline
+         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
+         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
+         * nanoseconds.
+         */
+        if ((deadline < 0) || (deadline > INT32_MAX)) {
+            deadline = INT32_MAX;
+        }
+
+        return icount_round(deadline);
+    } else {
+        return replay_get_instructions();
+    }
+}
+
+static void notify_aio_contexts(void)
+{
+    /* Wake up other AioContexts.  */
+    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+    qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
+}
+
+void handle_icount_deadline(void)
+{
+    assert(qemu_in_vcpu_thread());
+    int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                                  QEMU_TIMER_ATTR_ALL);
+
+    if (deadline == 0) {
+        notify_aio_contexts();
+    }
+}
+
+void prepare_icount_for_run(CPUState *cpu)
+{
+    int insns_left;
+
+    /*
+     * These should always be cleared by process_icount_data after
+     * each vCPU execution. However u16.high can be raised
+     * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
+     */
+    g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
+    g_assert(cpu->icount_extra == 0);
+
+    cpu->icount_budget = tcg_get_icount_limit();
+    insns_left = MIN(0xffff, cpu->icount_budget);
+    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
+    cpu->icount_extra = cpu->icount_budget - insns_left;
+
+    replay_mutex_lock();
+
+    if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
+        notify_aio_contexts();
+    }
+}
+
+void process_icount_data(CPUState *cpu)
+{
+    /* Account for executed instructions */
+    icount_update(cpu);
+
+    /* Reset the counters */
+    cpu_neg(cpu)->icount_decr.u16.low = 0;
+    cpu->icount_extra = 0;
+    cpu->icount_budget = 0;
+
+    replay_account_executed_instructions();
+
+    replay_mutex_unlock();
+}
+
+static void icount_handle_interrupt(CPUState *cpu, int mask)
+{
+    int old_mask = cpu->interrupt_request;
+
+    tcg_handle_interrupt(cpu, mask);
+    if (qemu_cpu_is_self(cpu) &&
+        !cpu->can_do_io
+        && (mask & ~old_mask) != 0) {
+        cpu_abort(cpu, "Raised interrupt while not in I/O function");
+    }
+}
+
+const CpusAccel tcg_cpus_icount = {
+    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
+
+    .handle_interrupt = icount_handle_interrupt,
+    .get_virtual_clock = icount_get,
+    .get_elapsed_ticks = icount_get,
+};
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Multi Threaded vCPUs implementation
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/tcg.h"
+#include "sysemu/replay.h"
+#include "qemu/main-loop.h"
+#include "qemu/guest-random.h"
+#include "exec/exec-all.h"
+#include "hw/boards.h"
+
+#include "tcg-cpus.h"
+#include "tcg-cpus-mttcg.h"
+
+/*
+ * In the multi-threaded case each vCPU has its own thread. The TLS
+ * variable current_cpu can be used deep in the code to find the
+ * current CPUState for a given thread.
+ */
+
+void *tcg_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+
+    assert(tcg_enabled());
+    g_assert(!icount_enabled());
+
+    rcu_register_thread();
+    tcg_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
+    current_cpu = cpu;
+    cpu_thread_signal_created(cpu);
+    qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+    /* process any pending work */
+    cpu->exit_request = 1;
+
+    do {
+        if (cpu_can_run(cpu)) {
+            int r;
+            qemu_mutex_unlock_iothread();
+            r = tcg_cpu_exec(cpu);
+            qemu_mutex_lock_iothread();
+            switch (r) {
+            case EXCP_DEBUG:
+                cpu_handle_guest_debug(cpu);
+                break;
+            case EXCP_HALTED:
+                /*
+                 * during start-up the vCPU is reset and the thread is
+                 * kicked several times. If we don't ensure we go back
+                 * to sleep in the halted state we won't cleanly
+                 * start-up when the vCPU is enabled.
+                 *
+                 * cpu->halted should ensure we sleep in wait_io_event
+                 */
+                g_assert(cpu->halted);
+                break;
+            case EXCP_ATOMIC:
+                qemu_mutex_unlock_iothread();
+                cpu_exec_step_atomic(cpu);
+                qemu_mutex_lock_iothread();
+            default:
+                /* Ignore everything else? */
+                break;
+            }
+        }
+
+        qatomic_mb_set(&cpu->exit_request, 0);
+        qemu_wait_io_event(cpu);
+    } while (!cpu->unplug || cpu_can_run(cpu));
+
+    qemu_tcg_destroy_vcpu(cpu);
+    qemu_mutex_unlock_iothread();
+    rcu_unregister_thread();
+    return NULL;
+}
+
+static void mttcg_kick_vcpu_thread(CPUState *cpu)
+{
+    cpu_exit(cpu);
+}
+
+const CpusAccel tcg_cpus_mttcg = {
+    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .kick_vcpu_thread = mttcg_kick_vcpu_thread,
+
+    .handle_interrupt = tcg_handle_interrupt,
+};
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Single Threaded vCPUs implementation
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/tcg.h"
+#include "sysemu/replay.h"
+#include "qemu/main-loop.h"
+#include "qemu/guest-random.h"
+#include "exec/exec-all.h"
+#include "hw/boards.h"
+
+#include "tcg-cpus.h"
+#include "tcg-cpus-rr.h"
+#include "tcg-cpus-icount.h"
+
+/* Kick all RR vCPUs */
+void qemu_cpu_kick_rr_cpus(CPUState *unused)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_exit(cpu);
+    };
+}
+
+/*
+ * TCG vCPU kick timer
+ *
+ * The kick timer is responsible for moving single threaded vCPU
+ * emulation on to the next vCPU. If more than one vCPU is running a
+ * timer event with force a cpu->exit so the next vCPU can get
+ * scheduled.
+ *
+ * The timer is removed if all vCPUs are idle and restarted again once
+ * idleness is complete.
+ */
+
+static QEMUTimer *tcg_kick_vcpu_timer;
+static CPUState *tcg_current_rr_cpu;
+
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
+
+static inline int64_t qemu_tcg_next_kick(void)
+{
+    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
+}
+
+/* Kick the currently round-robin scheduled vCPU to next */
+static void qemu_cpu_kick_rr_next_cpu(void)
+{
+    CPUState *cpu;
+    do {
+        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
+        if (cpu) {
+            cpu_exit(cpu);
+        }
+    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
+}
+
+static void kick_tcg_thread(void *opaque)
+{
+    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+    qemu_cpu_kick_rr_next_cpu();
+}
+
+static void start_tcg_kick_timer(void)
+{
+    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
+        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                           kick_tcg_thread, NULL);
+    }
+    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
+        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+    }
+}
+
+static void stop_tcg_kick_timer(void)
+{
+    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
+        timer_del(tcg_kick_vcpu_timer);
+    }
+}
+
+static void qemu_tcg_rr_wait_io_event(void)
+{
+    CPUState *cpu;
+
+    while (all_cpu_threads_idle()) {
+        stop_tcg_kick_timer();
+        qemu_cond_wait_iothread(first_cpu->halt_cond);
+    }
+
+    start_tcg_kick_timer();
+
+    CPU_FOREACH(cpu) {
+        qemu_wait_io_event_common(cpu);
+    }
+}
+
+/*
+ * Destroy any remaining vCPUs which have been unplugged and have
+ * finished running
+ */
+static void deal_with_unplugged_cpus(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->unplug && !cpu_can_run(cpu)) {
+            qemu_tcg_destroy_vcpu(cpu);
+            break;
+        }
+    }
+}
+
+/*
+ * In the single-threaded case each vCPU is simulated in turn. If
+ * there is more than a single vCPU we create a simple timer to kick
+ * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
+ * This is done explicitly rather than relying on side-effects
+ * elsewhere.
+ */
+
+void *tcg_rr_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+
+    assert(tcg_enabled());
+    rcu_register_thread();
+    tcg_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
+    cpu_thread_signal_created(cpu);
+    qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+    /* wait for initial kick-off after machine start */
+    while (first_cpu->stopped) {
+        qemu_cond_wait_iothread(first_cpu->halt_cond);
+
+        /* process any pending work */
+        CPU_FOREACH(cpu) {
+            current_cpu = cpu;
+            qemu_wait_io_event_common(cpu);
+        }
+    }
+
+    start_tcg_kick_timer();
+
+    cpu = first_cpu;
+
+    /* process any pending work */
+    cpu->exit_request = 1;
+
+    while (1) {
+        qemu_mutex_unlock_iothread();
+        replay_mutex_lock();
+        qemu_mutex_lock_iothread();
+
+        if (icount_enabled()) {
+            /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
+            icount_account_warp_timer();
+            /*
+             * Run the timers here.  This is much more efficient than
+             * waking up the I/O thread and waiting for completion.
+             */
+            handle_icount_deadline();
+        }
+
+        replay_mutex_unlock();
+
+        if (!cpu) {
+            cpu = first_cpu;
+        }
+
+        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
+
+            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
+            current_cpu = cpu;
+
+            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
+                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+
+            if (cpu_can_run(cpu)) {
+                int r;
+
+                qemu_mutex_unlock_iothread();
+                if (icount_enabled()) {
+                    prepare_icount_for_run(cpu);
+                }
+                r = tcg_cpu_exec(cpu);
+                if (icount_enabled()) {
+                    process_icount_data(cpu);
+                }
+                qemu_mutex_lock_iothread();
+
+                if (r == EXCP_DEBUG) {
+                    cpu_handle_guest_debug(cpu);
+                    break;
+                } else if (r == EXCP_ATOMIC) {
+                    qemu_mutex_unlock_iothread();
+                    cpu_exec_step_atomic(cpu);
+                    qemu_mutex_lock_iothread();
+                    break;
+                }
+            } else if (cpu->stop) {
+                if (cpu->unplug) {
+                    cpu = CPU_NEXT(cpu);
+                }
+                break;
+            }
+
+            cpu = CPU_NEXT(cpu);
+        } /* while (cpu && !cpu->exit_request).. */
+
+        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
+        qatomic_set(&tcg_current_rr_cpu, NULL);
+
+        if (cpu && cpu->exit_request) {
+            qatomic_mb_set(&cpu->exit_request, 0);
+        }
+
+        if (icount_enabled() && all_cpu_threads_idle()) {
+            /*
+             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
+             * in the main_loop, wake it up in order to start the warp timer.
+             */
+            qemu_notify_event();
+        }
+
+        qemu_tcg_rr_wait_io_event();
+        deal_with_unplugged_cpus();
+    }
+
+    rcu_unregister_thread();
+    return NULL;
+}
+
+const CpusAccel tcg_cpus_rr = {
+    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
+
+    .handle_interrupt = tcg_handle_interrupt,
+};
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.c
+++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
 /*
- * QEMU System Emulator
+ * QEMU TCG vCPU common functionality
+ *
+ * Functionality common to all TCG vCPU variants: mttcg, rr and icount.
  *
  * Copyright (c) 2003-2008 Fabrice Bellard
  * Copyright (c) 2014 Red Hat Inc.
@@ -XXX,XX +XXX,XX @@
 #include "hw/boards.h"
 
 #include "tcg-cpus.h"
+#include "tcg-cpus-mttcg.h"
+#include "tcg-cpus-rr.h"
 
-/* Kick all RR vCPUs */
-static void qemu_cpu_kick_rr_cpus(void)
-{
-    CPUState *cpu;
+/* common functionality among all TCG variants */
 
-    CPU_FOREACH(cpu) {
-        cpu_exit(cpu);
-    };
-}
-
-static void tcg_kick_vcpu_thread(CPUState *cpu)
-{
-    if (qemu_tcg_mttcg_enabled()) {
-        cpu_exit(cpu);
-    } else {
-        qemu_cpu_kick_rr_cpus();
-    }
-}
-
-/*
- * TCG vCPU kick timer
- *
- * The kick timer is responsible for moving single threaded vCPU
- * emulation on to the next vCPU. If more than one vCPU is running a
- * timer event with force a cpu->exit so the next vCPU can get
- * scheduled.
- *
- * The timer is removed if all vCPUs are idle and restarted again once
- * idleness is complete.
- */
-
-static QEMUTimer *tcg_kick_vcpu_timer;
-static CPUState *tcg_current_rr_cpu;
-
-#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
-
-static inline int64_t qemu_tcg_next_kick(void)
-{
-    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
-}
-
-/* Kick the currently round-robin scheduled vCPU to next */
-static void qemu_cpu_kick_rr_next_cpu(void)
-{
-    CPUState *cpu;
-    do {
-        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
-        if (cpu) {
-            cpu_exit(cpu);
-        }
-    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
-}
-
-static void kick_tcg_thread(void *opaque)
-{
-    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
-    qemu_cpu_kick_rr_next_cpu();
-}
-
-static void start_tcg_kick_timer(void)
-{
-    assert(!mttcg_enabled);
-    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
-        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                           kick_tcg_thread, NULL);
-    }
-    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
-        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
-    }
-}
-
-static void stop_tcg_kick_timer(void)
-{
-    assert(!mttcg_enabled);
-    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
-        timer_del(tcg_kick_vcpu_timer);
-    }
-}
-
-static void qemu_tcg_destroy_vcpu(CPUState *cpu)
-{
-}
-
-static void qemu_tcg_rr_wait_io_event(void)
-{
-    CPUState *cpu;
-
-    while (all_cpu_threads_idle()) {
-        stop_tcg_kick_timer();
-        qemu_cond_wait_iothread(first_cpu->halt_cond);
-    }
-
-    start_tcg_kick_timer();
-
-    CPU_FOREACH(cpu) {
-        qemu_wait_io_event_common(cpu);
-    }
-}
-
-static int64_t tcg_get_icount_limit(void)
-{
-    int64_t deadline;
-
-    if (replay_mode != REPLAY_MODE_PLAY) {
-        /*
-         * Include all the timers, because they may need an attention.
-         * Too long CPU execution may create unnecessary delay in UI.
-         */
-        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
-                                              QEMU_TIMER_ATTR_ALL);
-        /* Check realtime timers, because they help with input processing */
-        deadline = qemu_soonest_timeout(deadline,
-                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
-                                           QEMU_TIMER_ATTR_ALL));
-
-        /*
-         * Maintain prior (possibly buggy) behaviour where if no deadline
-         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
-         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
-         * nanoseconds.
-         */
-        if ((deadline < 0) || (deadline > INT32_MAX)) {
-            deadline = INT32_MAX;
-        }
-
-        return icount_round(deadline);
-    } else {
-        return replay_get_instructions();
-    }
-}
-
-static void notify_aio_contexts(void)
-{
-    /* Wake up other AioContexts.  */
-    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-    qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
-}
-
-static void handle_icount_deadline(void)
-{
-    assert(qemu_in_vcpu_thread());
-    if (icount_enabled()) {
-        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
-                                                      QEMU_TIMER_ATTR_ALL);
-
-        if (deadline == 0) {
-            notify_aio_contexts();
-        }
-    }
-}
-
-static void prepare_icount_for_run(CPUState *cpu)
-{
-    if (icount_enabled()) {
-        int insns_left;
-
-        /*
-         * These should always be cleared by process_icount_data after
-         * each vCPU execution. However u16.high can be raised
-         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
-         */
-        g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
-        g_assert(cpu->icount_extra == 0);
-
-        cpu->icount_budget = tcg_get_icount_limit();
-        insns_left = MIN(0xffff, cpu->icount_budget);
-        cpu_neg(cpu)->icount_decr.u16.low = insns_left;
-        cpu->icount_extra = cpu->icount_budget - insns_left;
-
-        replay_mutex_lock();
-
-        if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
-            notify_aio_contexts();
-        }
-    }
-}
-
-static void process_icount_data(CPUState *cpu)
-{
-    if (icount_enabled()) {
-        /* Account for executed instructions */
-        icount_update(cpu);
-
-        /* Reset the counters */
-        cpu_neg(cpu)->icount_decr.u16.low = 0;
-        cpu->icount_extra = 0;
-        cpu->icount_budget = 0;
-
-        replay_account_executed_instructions();
-
-        replay_mutex_unlock();
-    }
-}
-
-static int tcg_cpu_exec(CPUState *cpu)
-{
-    int ret;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
-
-    assert(tcg_enabled());
-#ifdef CONFIG_PROFILER
-    ti = profile_getclock();
-#endif
-    cpu_exec_start(cpu);
-    ret = cpu_exec(cpu);
-    cpu_exec_end(cpu);
-#ifdef CONFIG_PROFILER
-    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
-                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
-#endif
-    return ret;
-}
-
-/*
- * Destroy any remaining vCPUs which have been unplugged and have
- * finished running
- */
-static void deal_with_unplugged_cpus(void)
-{
-    CPUState *cpu;
-
-    CPU_FOREACH(cpu) {
-        if (cpu->unplug && !cpu_can_run(cpu)) {
-            qemu_tcg_destroy_vcpu(cpu);
-            cpu_thread_signal_destroyed(cpu);
-            break;
-        }
-    }
-}
-
-/*
- * Single-threaded TCG
- *
- * In the single-threaded case each vCPU is simulated in turn. If
- * there is more than a single vCPU we create a simple timer to kick
- * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
- * This is done explicitly rather than relying on side-effects
- * elsewhere.
- */
-
-static void *tcg_rr_cpu_thread_fn(void *arg)
-{
-    CPUState *cpu = arg;
-
-    assert(tcg_enabled());
-    rcu_register_thread();
-    tcg_register_thread();
-
-    qemu_mutex_lock_iothread();
-    qemu_thread_get_self(cpu->thread);
-
-    cpu->thread_id = qemu_get_thread_id();
-    cpu->can_do_io = 1;
-    cpu_thread_signal_created(cpu);
-    qemu_guest_random_seed_thread_part2(cpu->random_seed);
-
-    /* wait for initial kick-off after machine start */
-    while (first_cpu->stopped) {
-        qemu_cond_wait_iothread(first_cpu->halt_cond);
-
-        /* process any pending work */
-        CPU_FOREACH(cpu) {
-            current_cpu = cpu;
-            qemu_wait_io_event_common(cpu);
-        }
-    }
-
-    start_tcg_kick_timer();
-
-    cpu = first_cpu;
-
-    /* process any pending work */
-    cpu->exit_request = 1;
-
-    while (1) {
-        qemu_mutex_unlock_iothread();
-        replay_mutex_lock();
-        qemu_mutex_lock_iothread();
-        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
-        icount_account_warp_timer();
-
-        /*
-         * Run the timers here.  This is much more efficient than
-         * waking up the I/O thread and waiting for completion.
-         */
-        handle_icount_deadline();
-
-        replay_mutex_unlock();
-
-        if (!cpu) {
-            cpu = first_cpu;
-        }
-
-        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
-
-            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
-            current_cpu = cpu;
-
-            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
-                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
-
-            if (cpu_can_run(cpu)) {
-                int r;
-
-                qemu_mutex_unlock_iothread();
-                prepare_icount_for_run(cpu);
-
-                r = tcg_cpu_exec(cpu);
-
-                process_icount_data(cpu);
-                qemu_mutex_lock_iothread();
-
-                if (r == EXCP_DEBUG) {
-                    cpu_handle_guest_debug(cpu);
-                    break;
-                } else if (r == EXCP_ATOMIC) {
-                    qemu_mutex_unlock_iothread();
-                    cpu_exec_step_atomic(cpu);
-                    qemu_mutex_lock_iothread();
-                    break;
-                }
-            } else if (cpu->stop) {
-                if (cpu->unplug) {
-                    cpu = CPU_NEXT(cpu);
-                }
-                break;
-            }
-
-            cpu = CPU_NEXT(cpu);
-        } /* while (cpu && !cpu->exit_request).. */
-
-        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
-        qatomic_set(&tcg_current_rr_cpu, NULL);
-
-        if (cpu && cpu->exit_request) {
-            qatomic_mb_set(&cpu->exit_request, 0);
-        }
-
-        if (icount_enabled() && all_cpu_threads_idle()) {
-            /*
-             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
-             * in the main_loop, wake it up in order to start the warp timer.
-             */
-            qemu_notify_event();
-        }
-
-        qemu_tcg_rr_wait_io_event();
-        deal_with_unplugged_cpus();
-    }
-
-    rcu_unregister_thread();
-    return NULL;
-}
-
-/*
- * Multi-threaded TCG
- *
- * In the multi-threaded case each vCPU has its own thread. The TLS
- * variable current_cpu can be used deep in the code to find the
- * current CPUState for a given thread.
- */
-
-static void *tcg_cpu_thread_fn(void *arg)
-{
-    CPUState *cpu = arg;
-
-    assert(tcg_enabled());
-    g_assert(!icount_enabled());
-
-    rcu_register_thread();
-    tcg_register_thread();
-
-    qemu_mutex_lock_iothread();
-    qemu_thread_get_self(cpu->thread);
-
-    cpu->thread_id = qemu_get_thread_id();
-    cpu->can_do_io = 1;
-    current_cpu = cpu;
-    cpu_thread_signal_created(cpu);
-    qemu_guest_random_seed_thread_part2(cpu->random_seed);
-
-    /* process any pending work */
-    cpu->exit_request = 1;
-
-    do {
-        if (cpu_can_run(cpu)) {
-            int r;
-            qemu_mutex_unlock_iothread();
-            r = tcg_cpu_exec(cpu);
-            qemu_mutex_lock_iothread();
-            switch (r) {
-            case EXCP_DEBUG:
-                cpu_handle_guest_debug(cpu);
-                break;
-            case EXCP_HALTED:
-                /*
-                 * during start-up the vCPU is reset and the thread is
-                 * kicked several times. If we don't ensure we go back
-                 * to sleep in the halted state we won't cleanly
-                 * start-up when the vCPU is enabled.
-                 *
-                 * cpu->halted should ensure we sleep in wait_io_event
-                 */
-                g_assert(cpu->halted);
-                break;
-            case EXCP_ATOMIC:
-                qemu_mutex_unlock_iothread();
-                cpu_exec_step_atomic(cpu);
-                qemu_mutex_lock_iothread();
-            default:
-                /* Ignore everything else? */
-                break;
-            }
-        }
-
-        qatomic_mb_set(&cpu->exit_request, 0);
-        qemu_wait_io_event(cpu);
-    } while (!cpu->unplug || cpu_can_run(cpu));
-
-    qemu_tcg_destroy_vcpu(cpu);
-    cpu_thread_signal_destroyed(cpu);
-    qemu_mutex_unlock_iothread();
-    rcu_unregister_thread();
-    return NULL;
-}
-
-static void tcg_start_vcpu_thread(CPUState *cpu)
+void tcg_start_vcpu_thread(CPUState *cpu)
 {
     char thread_name[VCPU_THREAD_NAME_SIZE];
     static QemuCond *single_tcg_halt_cond;
@@ -XXX,XX +XXX,XX @@ static void tcg_start_vcpu_thread(CPUState *cpu)
     }
 }
 
-static int64_t tcg_get_virtual_clock(void)
+void qemu_tcg_destroy_vcpu(CPUState *cpu)
 {
-    if (icount_enabled()) {
-        return icount_get();
-    }
-    return cpu_get_clock();
+    cpu_thread_signal_destroyed(cpu);
 }
 
-static int64_t tcg_get_elapsed_ticks(void)
+int tcg_cpu_exec(CPUState *cpu)
 {
-    if (icount_enabled()) {
-        return icount_get();
-    }
-    return cpu_get_ticks();
+    int ret;
+#ifdef CONFIG_PROFILER
+    int64_t ti;
+#endif
+    assert(tcg_enabled());
+#ifdef CONFIG_PROFILER
+    ti = profile_getclock();
+#endif
+    cpu_exec_start(cpu);
+    ret = cpu_exec(cpu);
+    cpu_exec_end(cpu);
+#ifdef CONFIG_PROFILER
+    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
+                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
+#endif
+    return ret;
 }
 
 /* mask must never be zero, except for A20 change call */
-static void tcg_handle_interrupt(CPUState *cpu, int mask)
+void tcg_handle_interrupt(CPUState *cpu, int mask)
 {
-    int old_mask;
     g_assert(qemu_mutex_iothread_locked());
 
-    old_mask = cpu->interrupt_request;
     cpu->interrupt_request |= mask;
 
     /*
@@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
         qemu_cpu_kick(cpu);
     } else {
         qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
-        if (icount_enabled() &&
-            !cpu->can_do_io
-            && (mask & ~old_mask) != 0) {
-            cpu_abort(cpu, "Raised interrupt while not in I/O function");
-        }
     }
 }
-
-const CpusAccel tcg_cpus = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
-    .kick_vcpu_thread = tcg_kick_vcpu_thread,
-
-    .handle_interrupt = tcg_handle_interrupt,
-
-    .get_virtual_clock = tcg_get_virtual_clock,
-    .get_elapsed_ticks = tcg_get_elapsed_ticks,
-};
diff --git a/softmmu/icount.c b/softmmu/icount.c
index XXXXXXX..XXXXXXX 100644
--- a/softmmu/icount.c
+++ b/softmmu/icount.c
@@ -XXX,XX +XXX,XX @@ void icount_start_warp_timer(void)
 
 void icount_account_warp_timer(void)
 {
-    if (!icount_enabled() || !icount_sleep) {
+    if (!icount_sleep) {
         return;
     }
 
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
 tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
 specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
 
-specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files('tcg-all.c', 'cputlb.c', 'tcg-cpus.c'))
+specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
+  'tcg-all.c',
+  'cputlb.c',
+  'tcg-cpus.c',
+  'tcg-cpus-mttcg.c',
+  'tcg-cpus-icount.c',
+  'tcg-cpus-rr.c'
+))
-- 
2.25.1

From: Claudio Fontana <cfontana@suse.de>

after the initial split into 3 tcg variants, we proceed to also
split tcg_start_vcpu_thread.

We actually split it in 2 this time, since the icount variant
just uses the round robin function.

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Claudio Fontana <cfontana@suse.de>
Message-Id: <20201015143217.29337-3-cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-cpus-mttcg.h  | 21 --------------
 accel/tcg/tcg-cpus-rr.h     |  3 +-
 accel/tcg/tcg-cpus.h        |  1 -
 accel/tcg/tcg-all.c         |  5 ++++
 accel/tcg/tcg-cpus-icount.c |  2 +-
 accel/tcg/tcg-cpus-mttcg.c  | 29 +++++++++++++++++--
 accel/tcg/tcg-cpus-rr.c     | 39 +++++++++++++++++++++++--
 accel/tcg/tcg-cpus.c        | 58 -------------------------------------
 8 files changed, 71 insertions(+), 87 deletions(-)
 delete mode 100644 accel/tcg/tcg-cpus-mttcg.h

diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
deleted file mode 100644
index XXXXXXX..XXXXXXX
--- a/accel/tcg/tcg-cpus-mttcg.h
+++ /dev/null
@@ -XXX,XX +XXX,XX @@
-/*
- * QEMU TCG Multi Threaded vCPUs implementation
- *
- * Copyright 2020 SUSE LLC
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TCG_CPUS_MTTCG_H
-#define TCG_CPUS_MTTCG_H
-
-/*
- * In the multi-threaded case each vCPU has its own thread. The TLS
- * variable current_cpu can be used deep in the code to find the
- * current CPUState for a given thread.
- */
-
-void *tcg_cpu_thread_fn(void *arg);
-
-#endif /* TCG_CPUS_MTTCG_H */
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-rr.h
+++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
 /* Kick all RR vCPUs. */
 void qemu_cpu_kick_rr_cpus(CPUState *unused);
 
-void *tcg_rr_cpu_thread_fn(void *arg);
+/* start the round robin vcpu thread */
+void rr_start_vcpu_thread(CPUState *cpu);
 
 #endif /* TCG_CPUS_RR_H */
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.h
+++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
 extern const CpusAccel tcg_cpus_icount;
 extern const CpusAccel tcg_cpus_rr;
 
-void tcg_start_vcpu_thread(CPUState *cpu);
 void qemu_tcg_destroy_vcpu(CPUState *cpu);
 int tcg_cpu_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
     tcg_exec_init(s->tb_size * 1024 * 1024);
     mttcg_enabled = s->mttcg_enabled;
 
+    /*
+     * Initialize TCG regions
+     */
+    tcg_region_init();
+
     if (mttcg_enabled) {
         cpus_register_accel(&tcg_cpus_mttcg);
     } else if (icount_enabled()) {
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-icount.c
+++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
 }
 
 const CpusAccel tcg_cpus_icount = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = rr_start_vcpu_thread,
     .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 
     .handle_interrupt = icount_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-mttcg.c
+++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
 #include "hw/boards.h"
 
 #include "tcg-cpus.h"
-#include "tcg-cpus-mttcg.h"
 
 /*
  * In the multi-threaded case each vCPU has its own thread. The TLS
@@ -XXX,XX +XXX,XX @@
  * current CPUState for a given thread.
  */
 
-void *tcg_cpu_thread_fn(void *arg)
+static void *tcg_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -XXX,XX +XXX,XX @@ static void mttcg_kick_vcpu_thread(CPUState *cpu)
     cpu_exit(cpu);
 }
 
+static void mttcg_start_vcpu_thread(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    g_assert(tcg_enabled());
+
+    parallel_cpus = (current_machine->smp.max_cpus > 1);
+
+    cpu->thread = g_malloc0(sizeof(QemuThread));
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
+
+    /* create a thread per vCPU with TCG (MTTCG) */
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
+             cpu->cpu_index);
+
+    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
+
+#ifdef _WIN32
+    cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+}
+
 const CpusAccel tcg_cpus_mttcg = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = mttcg_start_vcpu_thread,
     .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 
     .handle_interrupt = tcg_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-rr.c
+++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
  * elsewhere.
  */
 
-void *tcg_rr_cpu_thread_fn(void *arg)
+static void *tcg_rr_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -XXX,XX +XXX,XX @@ void *tcg_rr_cpu_thread_fn(void *arg)
     return NULL;
 }
 
+void rr_start_vcpu_thread(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+    static QemuCond *single_tcg_halt_cond;
+    static QemuThread *single_tcg_cpu_thread;
+
+    g_assert(tcg_enabled());
+    parallel_cpus = false;
+
+    if (!single_tcg_cpu_thread) {
+        cpu->thread = g_malloc0(sizeof(QemuThread));
+        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+        qemu_cond_init(cpu->halt_cond);
+
+        /* share a single thread for all cpus with TCG */
+        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
+        qemu_thread_create(cpu->thread, thread_name,
+                           tcg_rr_cpu_thread_fn,
+                           cpu, QEMU_THREAD_JOINABLE);
+
+        single_tcg_halt_cond = cpu->halt_cond;
+        single_tcg_cpu_thread = cpu->thread;
+#ifdef _WIN32
+        cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+    } else {
+        /* we share the thread */
+        cpu->thread = single_tcg_cpu_thread;
+        cpu->halt_cond = single_tcg_halt_cond;
+        cpu->thread_id = first_cpu->thread_id;
+        cpu->can_do_io = 1;
+        cpu->created = true;
+    }
+}
+
 const CpusAccel tcg_cpus_rr = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = rr_start_vcpu_thread,
     .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 
     .handle_interrupt = tcg_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.c
+++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
 #include "hw/boards.h"
 
 #include "tcg-cpus.h"
-#include "tcg-cpus-mttcg.h"
-#include "tcg-cpus-rr.h"
 
 /* common functionality among all TCG variants */
 
-void tcg_start_vcpu_thread(CPUState *cpu)
-{
-    char thread_name[VCPU_THREAD_NAME_SIZE];
-    static QemuCond *single_tcg_halt_cond;
-    static QemuThread *single_tcg_cpu_thread;
-    static int tcg_region_inited;
-
-    assert(tcg_enabled());
-    /*
-     * Initialize TCG regions--once. Now is a good time, because:
-     * (1) TCG's init context, prologue and target globals have been set up.
-     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
-     *     -accel flag is processed, so the check doesn't work then).
-     */
-    if (!tcg_region_inited) {
-        tcg_region_inited = 1;
-        tcg_region_init();
-        parallel_cpus = qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
-    }
-
-    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
-        cpu->thread = g_malloc0(sizeof(QemuThread));
-        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
-        qemu_cond_init(cpu->halt_cond);
-
-        if (qemu_tcg_mttcg_enabled()) {
-            /* create a thread per vCPU with TCG (MTTCG) */
-            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
-                 cpu->cpu_index);
-
-            qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
-                               cpu, QEMU_THREAD_JOINABLE);
-
-        } else {
-            /* share a single thread for all cpus with TCG */
-            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
-            qemu_thread_create(cpu->thread, thread_name,
-                               tcg_rr_cpu_thread_fn,
-                               cpu, QEMU_THREAD_JOINABLE);
-
-            single_tcg_halt_cond = cpu->halt_cond;
-            single_tcg_cpu_thread = cpu->thread;
-        }
-#ifdef _WIN32
-        cpu->hThread = qemu_thread_get_handle(cpu->thread);
-#endif
-    } else {
-        /* For non-MTTCG cases we share the thread */
-        cpu->thread = single_tcg_cpu_thread;
-        cpu->halt_cond = single_tcg_halt_cond;
-        cpu->thread_id = first_cpu->thread_id;
-        cpu->can_do_io = 1;
-        cpu->created = true;
-    }
-}
-
 void qemu_tcg_destroy_vcpu(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
-- 
2.25.1

From: Claudio Fontana <cfontana@suse.de>

Signed-off-by: Claudio Fontana <cfontana@suse.de>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20201015143217.29337-4-cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-cpus-icount.h |  6 +--
 accel/tcg/tcg-cpus-rr.h     |  2 +-
 accel/tcg/tcg-cpus.h        |  6 +--
 accel/tcg/tcg-cpus-icount.c | 24 ++++++------
 accel/tcg/tcg-cpus-mttcg.c  | 10 ++---
 accel/tcg/tcg-cpus-rr.c     | 74 ++++++++++++++++++-------------------
 accel/tcg/tcg-cpus.c        |  6 +--
 7 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-icount.h
+++ b/accel/tcg/tcg-cpus-icount.h
@@ -XXX,XX +XXX,XX @@
 #ifndef TCG_CPUS_ICOUNT_H
 #define TCG_CPUS_ICOUNT_H
 
-void handle_icount_deadline(void);
-void prepare_icount_for_run(CPUState *cpu);
-void process_icount_data(CPUState *cpu);
+void icount_handle_deadline(void);
+void icount_prepare_for_run(CPUState *cpu);
+void icount_process_data(CPUState *cpu);
 
 #endif /* TCG_CPUS_ICOUNT_H */
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-rr.h
+++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 
 /* Kick all RR vCPUs. */
-void qemu_cpu_kick_rr_cpus(CPUState *unused);
+void rr_kick_vcpu_thread(CPUState *unused);
 
 /* start the round robin vcpu thread */
 void rr_start_vcpu_thread(CPUState *cpu);
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.h
+++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
 extern const CpusAccel tcg_cpus_icount;
 extern const CpusAccel tcg_cpus_rr;
 
-void qemu_tcg_destroy_vcpu(CPUState *cpu);
-int tcg_cpu_exec(CPUState *cpu);
-void tcg_handle_interrupt(CPUState *cpu, int mask);
+void tcg_cpus_destroy(CPUState *cpu);
+int tcg_cpus_exec(CPUState *cpu);
+void tcg_cpus_handle_interrupt(CPUState *cpu, int mask);
 
 #endif /* TCG_CPUS_H */
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-icount.c
+++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg-cpus-icount.h"
 #include "tcg-cpus-rr.h"
 
-static int64_t tcg_get_icount_limit(void)
+static int64_t icount_get_limit(void)
 {
     int64_t deadline;
 
@@ -XXX,XX +XXX,XX @@ static int64_t tcg_get_icount_limit(void)
     }
 }
 
-static void notify_aio_contexts(void)
+static void icount_notify_aio_contexts(void)
 {
     /* Wake up other AioContexts.  */
     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
     qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 }
 
-void handle_icount_deadline(void)
+void icount_handle_deadline(void)
 {
     assert(qemu_in_vcpu_thread());
     int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
                                                   QEMU_TIMER_ATTR_ALL);
 
     if (deadline == 0) {
-        notify_aio_contexts();
+        icount_notify_aio_contexts();
     }
 }
 
-void prepare_icount_for_run(CPUState *cpu)
+void icount_prepare_for_run(CPUState *cpu)
 {
     int insns_left;
 
     /*
-     * These should always be cleared by process_icount_data after
+     * These should always be cleared by icount_process_data after
      * each vCPU execution. However u16.high can be raised
-     * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
+     * asynchronously by cpu_exit/cpu_interrupt/tcg_cpus_handle_interrupt
      */
     g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
     g_assert(cpu->icount_extra == 0);
 
-    cpu->icount_budget = tcg_get_icount_limit();
+    cpu->icount_budget = icount_get_limit();
     insns_left = MIN(0xffff, cpu->icount_budget);
     cpu_neg(cpu)->icount_decr.u16.low = insns_left;
     cpu->icount_extra = cpu->icount_budget - insns_left;
@@ -XXX,XX +XXX,XX @@ void prepare_icount_for_run(CPUState *cpu)
     replay_mutex_lock();
 
     if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
-        notify_aio_contexts();
+        icount_notify_aio_contexts();
     }
 }
 
-void process_icount_data(CPUState *cpu)
+void icount_process_data(CPUState *cpu)
 {
     /* Account for executed instructions */
     icount_update(cpu);
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
 {
     int old_mask = cpu->interrupt_request;
 
-    tcg_handle_interrupt(cpu, mask);
+    tcg_cpus_handle_interrupt(cpu, mask);
     if (qemu_cpu_is_self(cpu) &&
         !cpu->can_do_io
         && (mask & ~old_mask) != 0) {
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
 
 const CpusAccel tcg_cpus_icount = {
     .create_vcpu_thread = rr_start_vcpu_thread,
-    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
+    .kick_vcpu_thread = rr_kick_vcpu_thread,
 
     .handle_interrupt = icount_handle_interrupt,
     .get_virtual_clock = icount_get,
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-mttcg.c
+++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
  * current CPUState for a given thread.
  */
 
-static void *tcg_cpu_thread_fn(void *arg)
+static void *mttcg_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
         if (cpu_can_run(cpu)) {
             int r;
             qemu_mutex_unlock_iothread();
-            r = tcg_cpu_exec(cpu);
+            r = tcg_cpus_exec(cpu);
             qemu_mutex_lock_iothread();
             switch (r) {
             case EXCP_DEBUG:
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
         qemu_wait_io_event(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
-    qemu_tcg_destroy_vcpu(cpu);
+    tcg_cpus_destroy(cpu);
     qemu_mutex_unlock_iothread();
     rcu_unregister_thread();
     return NULL;
@@ -XXX,XX +XXX,XX @@ static void mttcg_start_vcpu_thread(CPUState *cpu)
     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
              cpu->cpu_index);
 
-    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
+    qemu_thread_create(cpu->thread, thread_name, mttcg_cpu_thread_fn,
                        cpu, QEMU_THREAD_JOINABLE);
 
 #ifdef _WIN32
@@ -XXX,XX +XXX,XX @@ const CpusAccel tcg_cpus_mttcg = {
     .create_vcpu_thread = mttcg_start_vcpu_thread,
     .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 
-    .handle_interrupt = tcg_handle_interrupt,
+    .handle_interrupt = tcg_cpus_handle_interrupt,
 };
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-rr.c
+++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg-cpus-icount.h"
 
 /* Kick all RR vCPUs */
-void qemu_cpu_kick_rr_cpus(CPUState *unused)
+void rr_kick_vcpu_thread(CPUState *unused)
 {
     CPUState *cpu;
 
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick_rr_cpus(CPUState *unused)
  * idleness is complete.
  */
 
-static QEMUTimer *tcg_kick_vcpu_timer;
-static CPUState *tcg_current_rr_cpu;
+static QEMUTimer *rr_kick_vcpu_timer;
+static CPUState *rr_current_cpu;
 
 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 
-static inline int64_t qemu_tcg_next_kick(void)
+static inline int64_t rr_next_kick_time(void)
 {
     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 }
 
 /* Kick the currently round-robin scheduled vCPU to next */
-static void qemu_cpu_kick_rr_next_cpu(void)
+static void rr_kick_next_cpu(void)
 {
     CPUState *cpu;
     do {
-        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
+        cpu = qatomic_mb_read(&rr_current_cpu);
         if (cpu) {
             cpu_exit(cpu);
         }
-    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
+    } while (cpu != qatomic_mb_read(&rr_current_cpu));
 }
 
-static void kick_tcg_thread(void *opaque)
+static void rr_kick_thread(void *opaque)
 {
-    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
-    qemu_cpu_kick_rr_next_cpu();
+    timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
+    rr_kick_next_cpu();
 }
 
-static void start_tcg_kick_timer(void)
+static void rr_start_kick_timer(void)
 {
-    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
-        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                           kick_tcg_thread, NULL);
+    if (!rr_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
+        rr_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                           rr_kick_thread, NULL);
     }
-    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
-        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+    if (rr_kick_vcpu_timer && !timer_pending(rr_kick_vcpu_timer)) {
+        timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
     }
 }
 
-static void stop_tcg_kick_timer(void)
+static void rr_stop_kick_timer(void)
 {
-    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
-        timer_del(tcg_kick_vcpu_timer);
+    if (rr_kick_vcpu_timer && timer_pending(rr_kick_vcpu_timer)) {
+        timer_del(rr_kick_vcpu_timer);
     }
 }
 
-static void qemu_tcg_rr_wait_io_event(void)
+static void rr_wait_io_event(void)
 {
     CPUState *cpu;
 
     while (all_cpu_threads_idle()) {
-        stop_tcg_kick_timer();
+        rr_stop_kick_timer();
         qemu_cond_wait_iothread(first_cpu->halt_cond);
     }
 
-    start_tcg_kick_timer();
+    rr_start_kick_timer();
 
     CPU_FOREACH(cpu) {
         qemu_wait_io_event_common(cpu);
@@ -XXX,XX +XXX,XX @@ static void qemu_tcg_rr_wait_io_event(void)
  * Destroy any remaining vCPUs which have been unplugged and have
  * finished running
  */
-static void deal_with_unplugged_cpus(void)
+static void rr_deal_with_unplugged_cpus(void)
 {
     CPUState *cpu;
 
     CPU_FOREACH(cpu) {
         if (cpu->unplug && !cpu_can_run(cpu)) {
-            qemu_tcg_destroy_vcpu(cpu);
+            tcg_cpus_destroy(cpu);
             break;
         }
     }
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
  * elsewhere.
  */
 
-static void *tcg_rr_cpu_thread_fn(void *arg)
+static void *rr_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
         }
     }
 
-    start_tcg_kick_timer();
+    rr_start_kick_timer();
 
     cpu = first_cpu;
 
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
              * Run the timers here.  This is much more efficient than
              * waking up the I/O thread and waiting for completion.
              */
-            handle_icount_deadline();
+            icount_handle_deadline();
         }
 
         replay_mutex_unlock();
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
 
         while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
 
-            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
+            qatomic_mb_set(&rr_current_cpu, cpu);
             current_cpu = cpu;
 
             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
 
                 qemu_mutex_unlock_iothread();
                 if (icount_enabled()) {
-                    prepare_icount_for_run(cpu);
+                    icount_prepare_for_run(cpu);
                 }
-                r = tcg_cpu_exec(cpu);
+                r = tcg_cpus_exec(cpu);
                 if (icount_enabled()) {
-                    process_icount_data(cpu);
+                    icount_process_data(cpu);
                 }
                 qemu_mutex_lock_iothread();
 
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
         } /* while (cpu && !cpu->exit_request).. */
 
         /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
-        qatomic_set(&tcg_current_rr_cpu, NULL);
+        qatomic_set(&rr_current_cpu, NULL);
 
         if (cpu && cpu->exit_request) {
             qatomic_mb_set(&cpu->exit_request, 0);
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
             qemu_notify_event();
         }
 
-        qemu_tcg_rr_wait_io_event();
-        deal_with_unplugged_cpus();
+        rr_wait_io_event();
+        rr_deal_with_unplugged_cpus();
     }
 
     rcu_unregister_thread();
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
         /* share a single thread for all cpus with TCG */
         snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
         qemu_thread_create(cpu->thread, thread_name,
-                           tcg_rr_cpu_thread_fn,
+                           rr_cpu_thread_fn,
                            cpu, QEMU_THREAD_JOINABLE);
 
         single_tcg_halt_cond = cpu->halt_cond;
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
 
 const CpusAccel tcg_cpus_rr = {
     .create_vcpu_thread = rr_start_vcpu_thread,
-    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
+    .kick_vcpu_thread = rr_kick_vcpu_thread,
 
-    .handle_interrupt = tcg_handle_interrupt,
+    .handle_interrupt = tcg_cpus_handle_interrupt,
 };
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.c
+++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
 
 /* common functionality among all TCG variants */
 
-void qemu_tcg_destroy_vcpu(CPUState *cpu)
+void tcg_cpus_destroy(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
 }
 
-int tcg_cpu_exec(CPUState *cpu)
+int tcg_cpus_exec(CPUState *cpu)
 {
     int ret;
 #ifdef CONFIG_PROFILER
@@ -XXX,XX +XXX,XX @@ int tcg_cpu_exec(CPUState *cpu)
 }
 
 /* mask must never be zero, except for A20 change call */
-void tcg_handle_interrupt(CPUState *cpu, int mask)
+void tcg_cpus_handle_interrupt(CPUState *cpu, int mask)
 {
     g_assert(qemu_mutex_iothread_locked());
 
-- 
2.25.1