Series comparison

-[PULL 00/16] tcg patch queue
+[PULL 0/3] tcg patch queue
-The following changes since commit 3e08b2b9cb64bff2b73fa9128c0e49bfcde0dd40:
+The following changes since commit 2ecfc0657afa5d29a373271b342f704a1a3c6737:
-  Merge remote-tracking branch 'remotes/philmd-gitlab/tags/edk2-next-20200121' into staging (2020-01-21 15:29:25 +0000)
+  Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2020-12-10' into staging (2020-12-10 17:01:05 +0000)
 are available in the Git repository at:
-  https://github.com/rth7680/qemu.git tags/pull-tcg-20200121
+  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20201210
-for you to fetch changes up to 75fa376cdab5e5db2c7fdd107358e16f95503ac6:
+for you to fetch changes up to 9e2658d62ebc23efe7df43fc0e306f129510d874:
-  scripts/git.orderfile: Display decodetree before C source (2020-01-21 15:26:09 -1000)
+  accel/tcg: rename tcg-cpus functions to match module name (2020-12-10 17:44:10 -0600)
 ----------------------------------------------------------------
-Remove another limit to NB_MMU_MODES.
+Split CpusAccel for tcg variants
 Fix compilation using uclibc.
 Fix defaulting of -accel parameters.
 Tidy cputlb basic routines.
 Adjust git.orderfile for decodetree.
 ----------------------------------------------------------------
-Carlos Santos (1):
+Claudio Fontana (3):
-      util/cacheinfo: fix crash when compiling with uClibc
+      accel/tcg: split CpusAccel into three TCG variants
       accel/tcg: split tcg_start_vcpu_thread
       accel/tcg: rename tcg-cpus functions to match module name
-Philippe Mathieu-Daudé (1):
+ accel/tcg/tcg-cpus-icount.h |  17 ++
-      scripts/git.orderfile: Display decodetree before C source
+ accel/tcg/tcg-cpus-rr.h     |  21 ++
  accel/tcg/tcg-cpus.h        |  12 +-
  accel/tcg/tcg-all.c         |  13 +-
  accel/tcg/tcg-cpus-icount.c | 147 +++++++++++++
  accel/tcg/tcg-cpus-mttcg.c  | 140 ++++++++++++
  accel/tcg/tcg-cpus-rr.c     | 305 ++++++++++++++++++++++++++
  accel/tcg/tcg-cpus.c        | 506 +-------------------------------------------
  softmmu/icount.c            |   2 +-
  accel/tcg/meson.build       |   9 +-
 files changed, 670 insertions(+), 502 deletions(-)
  create mode 100644 accel/tcg/tcg-cpus-icount.h
  create mode 100644 accel/tcg/tcg-cpus-rr.h
  create mode 100644 accel/tcg/tcg-cpus-icount.c
  create mode 100644 accel/tcg/tcg-cpus-mttcg.c
  create mode 100644 accel/tcg/tcg-cpus-rr.c
-Richard Henderson (14):
-      cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
-      vl: Remove unused variable in configure_accelerators
-      vl: Reduce scope of variables in configure_accelerators
-      vl: Remove useless test in configure_accelerators
-      vl: Only choose enabled accelerators in configure_accelerators
-      cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
-      cputlb: Make tlb_n_entries private to cputlb.c
-      cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
-      cputlb: Hoist tlb portions in tlb_mmu_resize_locked
-      cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
-      cputlb: Split out tlb_mmu_flush_locked
-      cputlb: Partially merge tlb_dyn_init into tlb_init
-      cputlb: Initialize tlbs as flushed
-      cputlb: Hoist timestamp outside of loops over tlbs
- include/exec/cpu_ldst.h |   5 -
- accel/tcg/cputlb.c      | 287 +++++++++++++++++++++++++++++++++---------------
- util/cacheinfo.c        |  10 +-
- vl.c                    |  27 +++--
- scripts/git.orderfile   |   3 +
-files changed, 223 insertions(+), 109 deletions(-)

-[PULL 07/16] cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
+[PULL 1/3] accel/tcg: split CpusAccel into three TCG variants
-There is only one caller for tlb_table_flush_by_mmuidx.  Place
+From: Claudio Fontana <cfontana@suse.de>
 the result at the earlier line number, due to an expected user
 in the near future.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+split up the CpusAccel tcg_cpus into three TCG variants:
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
 tcg_cpus_rr (single threaded, round robin cpus)
 tcg_cpus_icount (same as rr, but with instruction counting enabled)
 tcg_cpus_mttcg (multi-threaded cpus)
 Suggested-by: Richard Henderson <richard.henderson@linaro.org>
 Signed-off-by: Claudio Fontana <cfontana@suse.de>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Message-Id: <20201015143217.29337-2-cfontana@suse.de>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/cputlb.c | 19 +++++++------------
+ accel/tcg/tcg-cpus-icount.h |  17 ++
-file changed, 7 insertions(+), 12 deletions(-)
+ accel/tcg/tcg-cpus-mttcg.h  |  21 ++
  accel/tcg/tcg-cpus-rr.h     |  20 ++
  accel/tcg/tcg-cpus.h        |  13 +-
  accel/tcg/tcg-all.c         |   8 +-
  accel/tcg/tcg-cpus-icount.c | 147 +++++++++++
  accel/tcg/tcg-cpus-mttcg.c  | 117 +++++++++
  accel/tcg/tcg-cpus-rr.c     | 270 ++++++++++++++++++++
  accel/tcg/tcg-cpus.c        | 484 ++----------------------------------
  softmmu/icount.c            |   2 +-
  accel/tcg/meson.build       |   9 +-
 files changed, 646 insertions(+), 462 deletions(-)
  create mode 100644 accel/tcg/tcg-cpus-icount.h
  create mode 100644 accel/tcg/tcg-cpus-mttcg.h
  create mode 100644 accel/tcg/tcg-cpus-rr.h
  create mode 100644 accel/tcg/tcg-cpus-icount.c
  create mode 100644 accel/tcg/tcg-cpus-mttcg.c
  create mode 100644 accel/tcg/tcg-cpus-rr.c
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
+diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-icount.h
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Single Threaded vCPUs implementation using instruction counting
 + *
 + * Copyright 2020 SUSE LLC
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + */
 +
 +#ifndef TCG_CPUS_ICOUNT_H
 +#define TCG_CPUS_ICOUNT_H
 +
 +void handle_icount_deadline(void);
 +void prepare_icount_for_run(CPUState *cpu);
 +void process_icount_data(CPUState *cpu);
 +
 +#endif /* TCG_CPUS_ICOUNT_H */
 diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-mttcg.h
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Multi Threaded vCPUs implementation
 + *
 + * Copyright 2020 SUSE LLC
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + */
 +
 +#ifndef TCG_CPUS_MTTCG_H
 +#define TCG_CPUS_MTTCG_H
 +
 +/*
 + * In the multi-threaded case each vCPU has its own thread. The TLS
 + * variable current_cpu can be used deep in the code to find the
 + * current CPUState for a given thread.
 + */
 +
 +void *tcg_cpu_thread_fn(void *arg);
 +
 +#endif /* TCG_CPUS_MTTCG_H */
 diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Single Threaded vCPUs implementation
 + *
 + * Copyright 2020 SUSE LLC
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + */
 +
 +#ifndef TCG_CPUS_RR_H
 +#define TCG_CPUS_RR_H
 +
 +#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 +
 +/* Kick all RR vCPUs. */
 +void qemu_cpu_kick_rr_cpus(CPUState *unused);
 +
 +void *tcg_rr_cpu_thread_fn(void *arg);
 +
 +#endif /* TCG_CPUS_RR_H */
 diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
 index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
+--- a/accel/tcg/tcg-cpus.h
-+++ b/accel/tcg/cputlb.c
++++ b/accel/tcg/tcg-cpus.h
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
+@@ -XXX,XX +XXX,XX @@
  /*
 - * Accelerator CPUS Interface
 + * QEMU TCG vCPU common functionality
 + *
 + * Functionality common to all TCG vcpu variants: mttcg, rr and icount.
   *
   * Copyright 2020 SUSE LLC
   *
@@ -XXX,XX +XXX,XX @@
  #include "sysemu/cpus.h"
 -extern const CpusAccel tcg_cpus;
 +extern const CpusAccel tcg_cpus_mttcg;
 +extern const CpusAccel tcg_cpus_icount;
 +extern const CpusAccel tcg_cpus_rr;
 +
 +void tcg_start_vcpu_thread(CPUState *cpu);
 +void qemu_tcg_destroy_vcpu(CPUState *cpu);
 +int tcg_cpu_exec(CPUState *cpu);
 +void tcg_handle_interrupt(CPUState *cpu, int mask);
  #endif /* TCG_CPUS_H */
 diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-all.c
 +++ b/accel/tcg/tcg-all.c
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
      tcg_exec_init(s->tb_size * 1024 * 1024);
      mttcg_enabled = s->mttcg_enabled;
 -    cpus_register_accel(&tcg_cpus);
 +    if (mttcg_enabled) {
 +        cpus_register_accel(&tcg_cpus_mttcg);
 +    } else if (icount_enabled()) {
 +        cpus_register_accel(&tcg_cpus_icount);
 +    } else {
 +        cpus_register_accel(&tcg_cpus_rr);
 +    }
      return 0;
  }
 diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Single Threaded vCPUs implementation using instruction counting
 + *
 + * Copyright (c) 2003-2008 Fabrice Bellard
 + * Copyright (c) 2014 Red Hat Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a copy
 + * of this software and associated documentation files (the "Software"), to deal
 + * in the Software without restriction, including without limitation the rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 + * THE SOFTWARE.
 + */
 +
 +#include "qemu/osdep.h"
 +#include "qemu-common.h"
 +#include "sysemu/tcg.h"
 +#include "sysemu/replay.h"
 +#include "qemu/main-loop.h"
 +#include "qemu/guest-random.h"
 +#include "exec/exec-all.h"
 +#include "hw/boards.h"
 +
 +#include "tcg-cpus.h"
 +#include "tcg-cpus-icount.h"
 +#include "tcg-cpus-rr.h"
 +
 +static int64_t tcg_get_icount_limit(void)
 +{
 +    int64_t deadline;
 +
 +    if (replay_mode != REPLAY_MODE_PLAY) {
 +        /*
 +         * Include all the timers, because they may need an attention.
 +         * Too long CPU execution may create unnecessary delay in UI.
 +         */
 +        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 +                                              QEMU_TIMER_ATTR_ALL);
 +        /* Check realtime timers, because they help with input processing */
 +        deadline = qemu_soonest_timeout(deadline,
 +                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
 +                                           QEMU_TIMER_ATTR_ALL));
 +
 +        /*
 +         * Maintain prior (possibly buggy) behaviour where if no deadline
 +         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
 +         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
 +         * nanoseconds.
 +         */
 +        if ((deadline < 0) || (deadline > INT32_MAX)) {
 +            deadline = INT32_MAX;
 +        }
 +
 +        return icount_round(deadline);
 +    } else {
 +        return replay_get_instructions();
 +    }
 +}
 +
 +static void notify_aio_contexts(void)
 +{
 +    /* Wake up other AioContexts.  */
 +    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 +    qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 +}
 +
 +void handle_icount_deadline(void)
 +{
 +    assert(qemu_in_vcpu_thread());
 +    int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 +                                                  QEMU_TIMER_ATTR_ALL);
 +
 +    if (deadline == 0) {
 +        notify_aio_contexts();
 +    }
 +}
 +
 +void prepare_icount_for_run(CPUState *cpu)
 +{
 +    int insns_left;
 +
 +    /*
 +     * These should always be cleared by process_icount_data after
 +     * each vCPU execution. However u16.high can be raised
 +     * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
 +     */
 +    g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
 +    g_assert(cpu->icount_extra == 0);
 +
 +    cpu->icount_budget = tcg_get_icount_limit();
 +    insns_left = MIN(0xffff, cpu->icount_budget);
 +    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
 +    cpu->icount_extra = cpu->icount_budget - insns_left;
 +
 +    replay_mutex_lock();
 +
 +    if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
 +        notify_aio_contexts();
 +    }
 +}
 +
 +void process_icount_data(CPUState *cpu)
 +{
 +    /* Account for executed instructions */
 +    icount_update(cpu);
 +
 +    /* Reset the counters */
 +    cpu_neg(cpu)->icount_decr.u16.low = 0;
 +    cpu->icount_extra = 0;
 +    cpu->icount_budget = 0;
 +
 +    replay_account_executed_instructions();
 +
 +    replay_mutex_unlock();
 +}
 +
 +static void icount_handle_interrupt(CPUState *cpu, int mask)
 +{
 +    int old_mask = cpu->interrupt_request;
 +
 +    tcg_handle_interrupt(cpu, mask);
 +    if (qemu_cpu_is_self(cpu) &&
 +        !cpu->can_do_io
 +        && (mask & ~old_mask) != 0) {
 +        cpu_abort(cpu, "Raised interrupt while not in I/O function");
 +    }
 +}
 +
 +const CpusAccel tcg_cpus_icount = {
 +    .create_vcpu_thread = tcg_start_vcpu_thread,
 +    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 +
 +    .handle_interrupt = icount_handle_interrupt,
 +    .get_virtual_clock = icount_get,
 +    .get_elapsed_ticks = icount_get,
 +};
 diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Multi Threaded vCPUs implementation
 + *
 + * Copyright (c) 2003-2008 Fabrice Bellard
 + * Copyright (c) 2014 Red Hat Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a copy
 + * of this software and associated documentation files (the "Software"), to deal
 + * in the Software without restriction, including without limitation the rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 + * THE SOFTWARE.
 + */
 +
 +#include "qemu/osdep.h"
 +#include "qemu-common.h"
 +#include "sysemu/tcg.h"
 +#include "sysemu/replay.h"
 +#include "qemu/main-loop.h"
 +#include "qemu/guest-random.h"
 +#include "exec/exec-all.h"
 +#include "hw/boards.h"
 +
 +#include "tcg-cpus.h"
 +#include "tcg-cpus-mttcg.h"
 +
 +/*
 + * In the multi-threaded case each vCPU has its own thread. The TLS
 + * variable current_cpu can be used deep in the code to find the
 + * current CPUState for a given thread.
 + */
 +
 +void *tcg_cpu_thread_fn(void *arg)
 +{
 +    CPUState *cpu = arg;
 +
 +    assert(tcg_enabled());
 +    g_assert(!icount_enabled());
 +
 +    rcu_register_thread();
 +    tcg_register_thread();
 +
 +    qemu_mutex_lock_iothread();
 +    qemu_thread_get_self(cpu->thread);
 +
 +    cpu->thread_id = qemu_get_thread_id();
 +    cpu->can_do_io = 1;
 +    current_cpu = cpu;
 +    cpu_thread_signal_created(cpu);
 +    qemu_guest_random_seed_thread_part2(cpu->random_seed);
 +
 +    /* process any pending work */
 +    cpu->exit_request = 1;
 +
 +    do {
 +        if (cpu_can_run(cpu)) {
 +            int r;
 +            qemu_mutex_unlock_iothread();
 +            r = tcg_cpu_exec(cpu);
 +            qemu_mutex_lock_iothread();
 +            switch (r) {
 +            case EXCP_DEBUG:
 +                cpu_handle_guest_debug(cpu);
 +                break;
 +            case EXCP_HALTED:
 +                /*
 +                 * during start-up the vCPU is reset and the thread is
 +                 * kicked several times. If we don't ensure we go back
 +                 * to sleep in the halted state we won't cleanly
 +                 * start-up when the vCPU is enabled.
 +                 *
 +                 * cpu->halted should ensure we sleep in wait_io_event
 +                 */
 +                g_assert(cpu->halted);
 +                break;
 +            case EXCP_ATOMIC:
 +                qemu_mutex_unlock_iothread();
 +                cpu_exec_step_atomic(cpu);
 +                qemu_mutex_lock_iothread();
 +            default:
 +                /* Ignore everything else? */
 +                break;
 +            }
 +        }
 +
 +        qatomic_mb_set(&cpu->exit_request, 0);
 +        qemu_wait_io_event(cpu);
 +    } while (!cpu->unplug || cpu_can_run(cpu));
 +
 +    qemu_tcg_destroy_vcpu(cpu);
 +    qemu_mutex_unlock_iothread();
 +    rcu_unregister_thread();
 +    return NULL;
 +}
 +
 +static void mttcg_kick_vcpu_thread(CPUState *cpu)
 +{
 +    cpu_exit(cpu);
 +}
 +
 +const CpusAccel tcg_cpus_mttcg = {
 +    .create_vcpu_thread = tcg_start_vcpu_thread,
 +    .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 +
 +    .handle_interrupt = tcg_handle_interrupt,
 +};
 diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@
 +/*
 + * QEMU TCG Single Threaded vCPUs implementation
 + *
 + * Copyright (c) 2003-2008 Fabrice Bellard
 + * Copyright (c) 2014 Red Hat Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a copy
 + * of this software and associated documentation files (the "Software"), to deal
 + * in the Software without restriction, including without limitation the rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 + * THE SOFTWARE.
 + */
 +
 +#include "qemu/osdep.h"
 +#include "qemu-common.h"
 +#include "sysemu/tcg.h"
 +#include "sysemu/replay.h"
 +#include "qemu/main-loop.h"
 +#include "qemu/guest-random.h"
 +#include "exec/exec-all.h"
 +#include "hw/boards.h"
 +
 +#include "tcg-cpus.h"
 +#include "tcg-cpus-rr.h"
 +#include "tcg-cpus-icount.h"
 +
 +/* Kick all RR vCPUs */
 +void qemu_cpu_kick_rr_cpus(CPUState *unused)
 +{
 +    CPUState *cpu;
 +
 +    CPU_FOREACH(cpu) {
 +        cpu_exit(cpu);
 +    };
 +}
 +
 +/*
 + * TCG vCPU kick timer
 + *
 + * The kick timer is responsible for moving single threaded vCPU
 + * emulation on to the next vCPU. If more than one vCPU is running a
 + * timer event with force a cpu->exit so the next vCPU can get
 + * scheduled.
 + *
 + * The timer is removed if all vCPUs are idle and restarted again once
 + * idleness is complete.
 + */
 +
 +static QEMUTimer *tcg_kick_vcpu_timer;
 +static CPUState *tcg_current_rr_cpu;
 +
 +#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 +
 +static inline int64_t qemu_tcg_next_kick(void)
 +{
 +    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 +}
 +
 +/* Kick the currently round-robin scheduled vCPU to next */
 +static void qemu_cpu_kick_rr_next_cpu(void)
 +{
 +    CPUState *cpu;
 +    do {
 +        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
 +        if (cpu) {
 +            cpu_exit(cpu);
 +        }
 +    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
 +}
 +
 +static void kick_tcg_thread(void *opaque)
 +{
 +    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 +    qemu_cpu_kick_rr_next_cpu();
 +}
 +
 +static void start_tcg_kick_timer(void)
 +{
 +    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 +        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 +                                           kick_tcg_thread, NULL);
 +    }
 +    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
 +        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 +    }
 +}
 +
 +static void stop_tcg_kick_timer(void)
 +{
 +    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
 +        timer_del(tcg_kick_vcpu_timer);
 +    }
 +}
 +
 +static void qemu_tcg_rr_wait_io_event(void)
 +{
 +    CPUState *cpu;
 +
 +    while (all_cpu_threads_idle()) {
 +        stop_tcg_kick_timer();
 +        qemu_cond_wait_iothread(first_cpu->halt_cond);
 +    }
 +
 +    start_tcg_kick_timer();
 +
 +    CPU_FOREACH(cpu) {
 +        qemu_wait_io_event_common(cpu);
 +    }
 +}
 +
 +/*
 + * Destroy any remaining vCPUs which have been unplugged and have
 + * finished running
 + */
 +static void deal_with_unplugged_cpus(void)
 +{
 +    CPUState *cpu;
 +
 +    CPU_FOREACH(cpu) {
 +        if (cpu->unplug && !cpu_can_run(cpu)) {
 +            qemu_tcg_destroy_vcpu(cpu);
 +            break;
 +        }
 +    }
 +}
 +
 +/*
 + * In the single-threaded case each vCPU is simulated in turn. If
 + * there is more than a single vCPU we create a simple timer to kick
 + * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
 + * This is done explicitly rather than relying on side-effects
 + * elsewhere.
 + */
 +
 +void *tcg_rr_cpu_thread_fn(void *arg)
 +{
 +    CPUState *cpu = arg;
 +
 +    assert(tcg_enabled());
 +    rcu_register_thread();
 +    tcg_register_thread();
 +
 +    qemu_mutex_lock_iothread();
 +    qemu_thread_get_self(cpu->thread);
 +
 +    cpu->thread_id = qemu_get_thread_id();
 +    cpu->can_do_io = 1;
 +    cpu_thread_signal_created(cpu);
 +    qemu_guest_random_seed_thread_part2(cpu->random_seed);
 +
 +    /* wait for initial kick-off after machine start */
 +    while (first_cpu->stopped) {
 +        qemu_cond_wait_iothread(first_cpu->halt_cond);
 +
 +        /* process any pending work */
 +        CPU_FOREACH(cpu) {
 +            current_cpu = cpu;
 +            qemu_wait_io_event_common(cpu);
 +        }
 +    }
 +
 +    start_tcg_kick_timer();
 +
 +    cpu = first_cpu;
 +
 +    /* process any pending work */
 +    cpu->exit_request = 1;
 +
 +    while (1) {
 +        qemu_mutex_unlock_iothread();
 +        replay_mutex_lock();
 +        qemu_mutex_lock_iothread();
 +
 +        if (icount_enabled()) {
 +            /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
 +            icount_account_warp_timer();
 +            /*
 +             * Run the timers here.  This is much more efficient than
 +             * waking up the I/O thread and waiting for completion.
 +             */
 +            handle_icount_deadline();
 +        }
 +
 +        replay_mutex_unlock();
 +
 +        if (!cpu) {
 +            cpu = first_cpu;
 +        }
 +
 +        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
 +
 +            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
 +            current_cpu = cpu;
 +
 +            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
 +                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
 +
 +            if (cpu_can_run(cpu)) {
 +                int r;
 +
 +                qemu_mutex_unlock_iothread();
 +                if (icount_enabled()) {
 +                    prepare_icount_for_run(cpu);
 +                }
 +                r = tcg_cpu_exec(cpu);
 +                if (icount_enabled()) {
 +                    process_icount_data(cpu);
 +                }
 +                qemu_mutex_lock_iothread();
 +
 +                if (r == EXCP_DEBUG) {
 +                    cpu_handle_guest_debug(cpu);
 +                    break;
 +                } else if (r == EXCP_ATOMIC) {
 +                    qemu_mutex_unlock_iothread();
 +                    cpu_exec_step_atomic(cpu);
 +                    qemu_mutex_lock_iothread();
 +                    break;
 +                }
 +            } else if (cpu->stop) {
 +                if (cpu->unplug) {
 +                    cpu = CPU_NEXT(cpu);
 +                }
 +                break;
 +            }
 +
 +            cpu = CPU_NEXT(cpu);
 +        } /* while (cpu && !cpu->exit_request).. */
 +
 +        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
 +        qatomic_set(&tcg_current_rr_cpu, NULL);
 +
 +        if (cpu && cpu->exit_request) {
 +            qatomic_mb_set(&cpu->exit_request, 0);
 +        }
 +
 +        if (icount_enabled() && all_cpu_threads_idle()) {
 +            /*
 +             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
 +             * in the main_loop, wake it up in order to start the warp timer.
 +             */
 +            qemu_notify_event();
 +        }
 +
 +        qemu_tcg_rr_wait_io_event();
 +        deal_with_unplugged_cpus();
 +    }
 +
 +    rcu_unregister_thread();
 +    return NULL;
 +}
 +
 +const CpusAccel tcg_cpus_rr = {
 +    .create_vcpu_thread = tcg_start_vcpu_thread,
 +    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 +
 +    .handle_interrupt = tcg_handle_interrupt,
 +};
 diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.c
 +++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
  /*
 - * QEMU System Emulator
 + * QEMU TCG vCPU common functionality
 + *
 + * Functionality common to all TCG vCPU variants: mttcg, rr and icount.
   *
   * Copyright (c) 2003-2008 Fabrice Bellard
   * Copyright (c) 2014 Red Hat Inc.
@@ -XXX,XX +XXX,XX @@
  #include "hw/boards.h"
  #include "tcg-cpus.h"
 +#include "tcg-cpus-mttcg.h"
 +#include "tcg-cpus-rr.h"
 -/* Kick all RR vCPUs */
 -static void qemu_cpu_kick_rr_cpus(void)
 -{
 -    CPUState *cpu;
 +/* common functionality among all TCG variants */
 -    CPU_FOREACH(cpu) {
 -        cpu_exit(cpu);
 -    };
 -}
 -
 -static void tcg_kick_vcpu_thread(CPUState *cpu)
 -{
 -    if (qemu_tcg_mttcg_enabled()) {
 -        cpu_exit(cpu);
 -    } else {
 -        qemu_cpu_kick_rr_cpus();
 -    }
 -}
 -
 -/*
 - * TCG vCPU kick timer
 - *
 - * The kick timer is responsible for moving single threaded vCPU
 - * emulation on to the next vCPU. If more than one vCPU is running a
 - * timer event with force a cpu->exit so the next vCPU can get
 - * scheduled.
 - *
 - * The timer is removed if all vCPUs are idle and restarted again once
 - * idleness is complete.
 - */
 -
 -static QEMUTimer *tcg_kick_vcpu_timer;
 -static CPUState *tcg_current_rr_cpu;
 -
 -#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 -
 -static inline int64_t qemu_tcg_next_kick(void)
 -{
 -    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 -}
 -
 -/* Kick the currently round-robin scheduled vCPU to next */
 -static void qemu_cpu_kick_rr_next_cpu(void)
 -{
 -    CPUState *cpu;
 -    do {
 -        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
 -        if (cpu) {
 -            cpu_exit(cpu);
 -        }
 -    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
 -}
 -
 -static void kick_tcg_thread(void *opaque)
 -{
 -    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 -    qemu_cpu_kick_rr_next_cpu();
 -}
 -
 -static void start_tcg_kick_timer(void)
 -{
 -    assert(!mttcg_enabled);
 -    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 -        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 -                                           kick_tcg_thread, NULL);
 -    }
 -    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
 -        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 -    }
 -}
 -
 -static void stop_tcg_kick_timer(void)
 -{
 -    assert(!mttcg_enabled);
 -    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
 -        timer_del(tcg_kick_vcpu_timer);
 -    }
 -}
 -
 -static void qemu_tcg_destroy_vcpu(CPUState *cpu)
 -{
 -}
 -
 -static void qemu_tcg_rr_wait_io_event(void)
 -{
 -    CPUState *cpu;
 -
 -    while (all_cpu_threads_idle()) {
 -        stop_tcg_kick_timer();
 -        qemu_cond_wait_iothread(first_cpu->halt_cond);
 -    }
 -
 -    start_tcg_kick_timer();
 -
 -    CPU_FOREACH(cpu) {
 -        qemu_wait_io_event_common(cpu);
 -    }
 -}
 -
 -static int64_t tcg_get_icount_limit(void)
 -{
 -    int64_t deadline;
 -
 -    if (replay_mode != REPLAY_MODE_PLAY) {
 -        /*
 -         * Include all the timers, because they may need an attention.
 -         * Too long CPU execution may create unnecessary delay in UI.
 -         */
 -        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 -                                              QEMU_TIMER_ATTR_ALL);
 -        /* Check realtime timers, because they help with input processing */
 -        deadline = qemu_soonest_timeout(deadline,
 -                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
 -                                           QEMU_TIMER_ATTR_ALL));
 -
 -        /*
 -         * Maintain prior (possibly buggy) behaviour where if no deadline
 -         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
 -         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
 -         * nanoseconds.
 -         */
 -        if ((deadline < 0) || (deadline > INT32_MAX)) {
 -            deadline = INT32_MAX;
 -        }
 -
 -        return icount_round(deadline);
 -    } else {
 -        return replay_get_instructions();
 -    }
 -}
 -
 -static void notify_aio_contexts(void)
 -{
 -    /* Wake up other AioContexts.  */
 -    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 -    qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 -}
 -
 -static void handle_icount_deadline(void)
 -{
 -    assert(qemu_in_vcpu_thread());
 -    if (icount_enabled()) {
 -        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 -                                                      QEMU_TIMER_ATTR_ALL);
 -
 -        if (deadline == 0) {
 -            notify_aio_contexts();
 -        }
 -    }
 -}
 -
 -static void prepare_icount_for_run(CPUState *cpu)
 -{
 -    if (icount_enabled()) {
 -        int insns_left;
 -
 -        /*
 -         * These should always be cleared by process_icount_data after
 -         * each vCPU execution. However u16.high can be raised
 -         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
 -         */
 -        g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
 -        g_assert(cpu->icount_extra == 0);
 -
 -        cpu->icount_budget = tcg_get_icount_limit();
 -        insns_left = MIN(0xffff, cpu->icount_budget);
 -        cpu_neg(cpu)->icount_decr.u16.low = insns_left;
 -        cpu->icount_extra = cpu->icount_budget - insns_left;
 -
 -        replay_mutex_lock();
 -
 -        if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
 -            notify_aio_contexts();
 -        }
 -    }
 -}
 -
 -static void process_icount_data(CPUState *cpu)
 -{
 -    if (icount_enabled()) {
 -        /* Account for executed instructions */
 -        icount_update(cpu);
 -
 -        /* Reset the counters */
 -        cpu_neg(cpu)->icount_decr.u16.low = 0;
 -        cpu->icount_extra = 0;
 -        cpu->icount_budget = 0;
 -
 -        replay_account_executed_instructions();
 -
 -        replay_mutex_unlock();
 -    }
 -}
 -
 -static int tcg_cpu_exec(CPUState *cpu)
 -{
 -    int ret;
 -#ifdef CONFIG_PROFILER
 -    int64_t ti;
 -#endif
 -
 -    assert(tcg_enabled());
 -#ifdef CONFIG_PROFILER
 -    ti = profile_getclock();
 -#endif
 -    cpu_exec_start(cpu);
 -    ret = cpu_exec(cpu);
 -    cpu_exec_end(cpu);
 -#ifdef CONFIG_PROFILER
 -    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
 -                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
 -#endif
 -    return ret;
 -}
 -
 -/*
 - * Destroy any remaining vCPUs which have been unplugged and have
 - * finished running
 - */
 -static void deal_with_unplugged_cpus(void)
 -{
 -    CPUState *cpu;
 -
 -    CPU_FOREACH(cpu) {
 -        if (cpu->unplug && !cpu_can_run(cpu)) {
 -            qemu_tcg_destroy_vcpu(cpu);
 -            cpu_thread_signal_destroyed(cpu);
 -            break;
 -        }
 -    }
 -}
 -
 -/*
 - * Single-threaded TCG
 - *
 - * In the single-threaded case each vCPU is simulated in turn. If
 - * there is more than a single vCPU we create a simple timer to kick
 - * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
 - * This is done explicitly rather than relying on side-effects
 - * elsewhere.
 - */
 -
 -static void *tcg_rr_cpu_thread_fn(void *arg)
 -{
 -    CPUState *cpu = arg;
 -
 -    assert(tcg_enabled());
 -    rcu_register_thread();
 -    tcg_register_thread();
 -
 -    qemu_mutex_lock_iothread();
 -    qemu_thread_get_self(cpu->thread);
 -
 -    cpu->thread_id = qemu_get_thread_id();
 -    cpu->can_do_io = 1;
 -    cpu_thread_signal_created(cpu);
 -    qemu_guest_random_seed_thread_part2(cpu->random_seed);
 -
 -    /* wait for initial kick-off after machine start */
 -    while (first_cpu->stopped) {
 -        qemu_cond_wait_iothread(first_cpu->halt_cond);
 -
 -        /* process any pending work */
 -        CPU_FOREACH(cpu) {
 -            current_cpu = cpu;
 -            qemu_wait_io_event_common(cpu);
 -        }
 -    }
 -
 -    start_tcg_kick_timer();
 -
 -    cpu = first_cpu;
 -
 -    /* process any pending work */
 -    cpu->exit_request = 1;
 -
 -    while (1) {
 -        qemu_mutex_unlock_iothread();
 -        replay_mutex_lock();
 -        qemu_mutex_lock_iothread();
 -        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
 -        icount_account_warp_timer();
 -
 -        /*
 -         * Run the timers here.  This is much more efficient than
 -         * waking up the I/O thread and waiting for completion.
 -         */
 -        handle_icount_deadline();
 -
 -        replay_mutex_unlock();
 -
 -        if (!cpu) {
 -            cpu = first_cpu;
 -        }
 -
 -        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
 -
 -            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
 -            current_cpu = cpu;
 -
 -            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
 -                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
 -
 -            if (cpu_can_run(cpu)) {
 -                int r;
 -
 -                qemu_mutex_unlock_iothread();
 -                prepare_icount_for_run(cpu);
 -
 -                r = tcg_cpu_exec(cpu);
 -
 -                process_icount_data(cpu);
 -                qemu_mutex_lock_iothread();
 -
 -                if (r == EXCP_DEBUG) {
 -                    cpu_handle_guest_debug(cpu);
 -                    break;
 -                } else if (r == EXCP_ATOMIC) {
 -                    qemu_mutex_unlock_iothread();
 -                    cpu_exec_step_atomic(cpu);
 -                    qemu_mutex_lock_iothread();
 -                    break;
 -                }
 -            } else if (cpu->stop) {
 -                if (cpu->unplug) {
 -                    cpu = CPU_NEXT(cpu);
 -                }
 -                break;
 -            }
 -
 -            cpu = CPU_NEXT(cpu);
 -        } /* while (cpu && !cpu->exit_request).. */
 -
 -        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
 -        qatomic_set(&tcg_current_rr_cpu, NULL);
 -
 -        if (cpu && cpu->exit_request) {
 -            qatomic_mb_set(&cpu->exit_request, 0);
 -        }
 -
 -        if (icount_enabled() && all_cpu_threads_idle()) {
 -            /*
 -             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
 -             * in the main_loop, wake it up in order to start the warp timer.
 -             */
 -            qemu_notify_event();
 -        }
 -
 -        qemu_tcg_rr_wait_io_event();
 -        deal_with_unplugged_cpus();
 -    }
 -
 -    rcu_unregister_thread();
 -    return NULL;
 -}
 -
 -/*
 - * Multi-threaded TCG
 - *
 - * In the multi-threaded case each vCPU has its own thread. The TLS
 - * variable current_cpu can be used deep in the code to find the
 - * current CPUState for a given thread.
 - */
 -
 -static void *tcg_cpu_thread_fn(void *arg)
 -{
 -    CPUState *cpu = arg;
 -
 -    assert(tcg_enabled());
 -    g_assert(!icount_enabled());
 -
 -    rcu_register_thread();
 -    tcg_register_thread();
 -
 -    qemu_mutex_lock_iothread();
 -    qemu_thread_get_self(cpu->thread);
 -
 -    cpu->thread_id = qemu_get_thread_id();
 -    cpu->can_do_io = 1;
 -    current_cpu = cpu;
 -    cpu_thread_signal_created(cpu);
 -    qemu_guest_random_seed_thread_part2(cpu->random_seed);
 -
 -    /* process any pending work */
 -    cpu->exit_request = 1;
 -
 -    do {
 -        if (cpu_can_run(cpu)) {
 -            int r;
 -            qemu_mutex_unlock_iothread();
 -            r = tcg_cpu_exec(cpu);
 -            qemu_mutex_lock_iothread();
 -            switch (r) {
 -            case EXCP_DEBUG:
 -                cpu_handle_guest_debug(cpu);
 -                break;
 -            case EXCP_HALTED:
 -                /*
 -                 * during start-up the vCPU is reset and the thread is
 -                 * kicked several times. If we don't ensure we go back
 -                 * to sleep in the halted state we won't cleanly
 -                 * start-up when the vCPU is enabled.
 -                 *
 -                 * cpu->halted should ensure we sleep in wait_io_event
 -                 */
 -                g_assert(cpu->halted);
 -                break;
 -            case EXCP_ATOMIC:
 -                qemu_mutex_unlock_iothread();
 -                cpu_exec_step_atomic(cpu);
 -                qemu_mutex_lock_iothread();
 -            default:
 -                /* Ignore everything else? */
 -                break;
 -            }
 -        }
 -
 -        qatomic_mb_set(&cpu->exit_request, 0);
 -        qemu_wait_io_event(cpu);
 -    } while (!cpu->unplug || cpu_can_run(cpu));
 -
 -    qemu_tcg_destroy_vcpu(cpu);
 -    cpu_thread_signal_destroyed(cpu);
 -    qemu_mutex_unlock_iothread();
 -    rcu_unregister_thread();
 -    return NULL;
 -}
 -
 -static void tcg_start_vcpu_thread(CPUState *cpu)
 +void tcg_start_vcpu_thread(CPUState *cpu)
  {
      char thread_name[VCPU_THREAD_NAME_SIZE];
      static QemuCond *single_tcg_halt_cond;
@@ -XXX,XX +XXX,XX @@ static void tcg_start_vcpu_thread(CPUState *cpu)
      }
  }
--static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
+-static int64_t tcg_get_virtual_clock(void)
-+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
++void qemu_tcg_destroy_vcpu(CPUState *cpu)
  {
-     tlb_mmu_resize_locked(env, mmu_idx);
+-    if (icount_enabled()) {
--    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
+-        return icount_get();
-     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
+-    }
-+    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
+-    return cpu_get_clock();
-+    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
++    cpu_thread_signal_destroyed(cpu);
 +    env_tlb(env)->d[mmu_idx].vindex = 0;
 +    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
 +    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
 +           sizeof(env_tlb(env)->d[0].vtable));
  }
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
+-static int64_t tcg_get_elapsed_ticks(void)
-@@ -XXX,XX +XXX,XX @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
++int tcg_cpu_exec(CPUState *cpu)
-     *pelide = elide;
+ {
 -    if (icount_enabled()) {
 -        return icount_get();
 -    }
 -    return cpu_get_ticks();
 +    int ret;
 +#ifdef CONFIG_PROFILER
 +    int64_t ti;
 +#endif
 +    assert(tcg_enabled());
 +#ifdef CONFIG_PROFILER
 +    ti = profile_getclock();
 +#endif
 +    cpu_exec_start(cpu);
 +    ret = cpu_exec(cpu);
 +    cpu_exec_end(cpu);
 +#ifdef CONFIG_PROFILER
 +    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
 +                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
 +#endif
 +    return ret;
  }
--static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
+ /* mask must never be zero, except for A20 change call */
--{
+-static void tcg_handle_interrupt(CPUState *cpu, int mask)
--    tlb_table_flush_by_mmuidx(env, mmu_idx);
++void tcg_handle_interrupt(CPUState *cpu, int mask)
 -    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
 -    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
 -    env_tlb(env)->d[mmu_idx].vindex = 0;
 -    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
 -           sizeof(env_tlb(env)->d[0].vtable));
 -}
 -
  static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
  {
-     CPUArchState *env = cpu->env_ptr;
+-    int old_mask;
      g_assert(qemu_mutex_iothread_locked());
 -    old_mask = cpu->interrupt_request;
      cpu->interrupt_request |= mask;
      /*
@@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
          qemu_cpu_kick(cpu);
      } else {
          qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
 -        if (icount_enabled() &&
 -            !cpu->can_do_io
 -            && (mask & ~old_mask) != 0) {
 -            cpu_abort(cpu, "Raised interrupt while not in I/O function");
 -        }
      }
  }
 -
 -const CpusAccel tcg_cpus = {
 -    .create_vcpu_thread = tcg_start_vcpu_thread,
 -    .kick_vcpu_thread = tcg_kick_vcpu_thread,
 -
 -    .handle_interrupt = tcg_handle_interrupt,
 -
 -    .get_virtual_clock = tcg_get_virtual_clock,
 -    .get_elapsed_ticks = tcg_get_elapsed_ticks,
 -};
 diff --git a/softmmu/icount.c b/softmmu/icount.c
 index XXXXXXX..XXXXXXX 100644
 --- a/softmmu/icount.c
 +++ b/softmmu/icount.c
@@ -XXX,XX +XXX,XX @@ void icount_start_warp_timer(void)
  void icount_account_warp_timer(void)
  {
 -    if (!icount_enabled() || !icount_sleep) {
 +    if (!icount_sleep) {
          return;
      }
 diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/meson.build
 +++ b/accel/tcg/meson.build
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
  tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
  specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
 -specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files('tcg-all.c', 'cputlb.c', 'tcg-cpus.c'))
 +specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
 +  'tcg-all.c',
 +  'cputlb.c',
 +  'tcg-cpus.c',
 +  'tcg-cpus-mttcg.c',
 +  'tcg-cpus-icount.c',
 +  'tcg-cpus-rr.c'
 +))
 --
-.20.1
+.25.1

-[PULL 01/16] cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
+[PULL 2/3] accel/tcg: split tcg_start_vcpu_thread
-In target/arm we will shortly have "too many" mmu_idx.
+From: Claudio Fontana <cfontana@suse.de>
-The current minimum barrier is caused by the way in which
-tlb_flush_page_by_mmuidx is coded.
+after the initial split into 3 tcg variants, we proceed to also
+split tcg_start_vcpu_thread.
-We can remove this limitation by allocating memory for
-consumption by the worker.  Let us assume that this is
+We actually split it in 2 this time, since the icount variant
-the unlikely case, as will be the case for the majority
+just uses the round robin function.
-of targets which have so far satisfied the BUILD_BUG_ON,
-and only allocate memory when necessary.
+Suggested-by: Richard Henderson <richard.henderson@linaro.org>
+Signed-off-by: Claudio Fontana <cfontana@suse.de>
-Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
+Message-Id: <20201015143217.29337-3-cfontana@suse.de>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++----------
+ accel/tcg/tcg-cpus-mttcg.h  | 21 --------------
-file changed, 132 insertions(+), 35 deletions(-)
+ accel/tcg/tcg-cpus-rr.h     |  3 +-
+ accel/tcg/tcg-cpus.h        |  1 -
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
+ accel/tcg/tcg-all.c         |  5 ++++
-index XXXXXXX..XXXXXXX 100644
+ accel/tcg/tcg-cpus-icount.c |  2 +-
---- a/accel/tcg/cputlb.c
+ accel/tcg/tcg-cpus-mttcg.c  | 29 +++++++++++++++++--
-+++ b/accel/tcg/cputlb.c
+ accel/tcg/tcg-cpus-rr.c     | 39 +++++++++++++++++++++++--
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
+ accel/tcg/tcg-cpus.c        | 58 -------------------------------------
-     }
+files changed, 71 insertions(+), 87 deletions(-)
  delete mode 100644 accel/tcg/tcg-cpus-mttcg.h
 diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
 deleted file mode 100644
 index XXXXXXX..XXXXXXX
 --- a/accel/tcg/tcg-cpus-mttcg.h
 +++ /dev/null
@@ -XXX,XX +XXX,XX @@
 -/*
 - * QEMU TCG Multi Threaded vCPUs implementation
 - *
 - * Copyright 2020 SUSE LLC
 - *
 - * This work is licensed under the terms of the GNU GPL, version 2 or later.
 - * See the COPYING file in the top-level directory.
 - */
 -
 -#ifndef TCG_CPUS_MTTCG_H
 -#define TCG_CPUS_MTTCG_H
 -
 -/*
 - * In the multi-threaded case each vCPU has its own thread. The TLS
 - * variable current_cpu can be used deep in the code to find the
 - * current CPUState for a given thread.
 - */
 -
 -void *tcg_cpu_thread_fn(void *arg);
 -
 -#endif /* TCG_CPUS_MTTCG_H */
 diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-rr.h
 +++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
  /* Kick all RR vCPUs. */
  void qemu_cpu_kick_rr_cpus(CPUState *unused);
 -void *tcg_rr_cpu_thread_fn(void *arg);
 +/* start the round robin vcpu thread */
 +void rr_start_vcpu_thread(CPUState *cpu);
  #endif /* TCG_CPUS_RR_H */
 diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.h
 +++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
  extern const CpusAccel tcg_cpus_icount;
  extern const CpusAccel tcg_cpus_rr;
 -void tcg_start_vcpu_thread(CPUState *cpu);
  void qemu_tcg_destroy_vcpu(CPUState *cpu);
  int tcg_cpu_exec(CPUState *cpu);
  void tcg_handle_interrupt(CPUState *cpu, int mask);
 diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-all.c
 +++ b/accel/tcg/tcg-all.c
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
      tcg_exec_init(s->tb_size * 1024 * 1024);
      mttcg_enabled = s->mttcg_enabled;
 +    /*
 +     * Initialize TCG regions
 +     */
 +    tcg_region_init();
 +
      if (mttcg_enabled) {
          cpus_register_accel(&tcg_cpus_mttcg);
      } else if (icount_enabled()) {
 diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-icount.c
 +++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
  }
--/* As we are going to hijack the bottom bits of the page address for a
+ const CpusAccel tcg_cpus_icount = {
-- * mmuidx bit mask we need to fail to build if we can't do that
+-    .create_vcpu_thread = tcg_start_vcpu_thread,
-+/**
++    .create_vcpu_thread = rr_start_vcpu_thread,
-+ * tlb_flush_page_by_mmuidx_async_0:
+     .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
-+ * @cpu: cpu on which to flush
-+ * @addr: page of virtual address to flush
+     .handle_interrupt = icount_handle_interrupt,
-+ * @idxmap: set of mmu_idx to flush
+diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
-+ *
+index XXXXXXX..XXXXXXX 100644
-+ * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
+--- a/accel/tcg/tcg-cpus-mttcg.c
-+ * at @addr from the tlbs indicated by @idxmap from @cpu.
++++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
  #include "hw/boards.h"
  #include "tcg-cpus.h"
 -#include "tcg-cpus-mttcg.h"
  /*
   * In the multi-threaded case each vCPU has its own thread. The TLS
@@ -XXX,XX +XXX,XX @@
   * current CPUState for a given thread.
   */
--QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
--
+-void *tcg_cpu_thread_fn(void *arg)
--static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
++static void *tcg_cpu_thread_fn(void *arg)
 -                                                run_on_cpu_data data)
 +static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
 +                                             target_ulong addr,
 +                                             uint16_t idxmap)
  {
-     CPUArchState *env = cpu->env_ptr;
+     CPUState *cpu = arg;
--    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
--    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
+@@ -XXX,XX +XXX,XX @@ static void mttcg_kick_vcpu_thread(CPUState *cpu)
--    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
+     cpu_exit(cpu);
      int mmu_idx;
      assert_cpu_is_self(cpu);
 -    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
 -              addr, mmu_idx_bitmap);
 +    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
      qemu_spin_lock(&env_tlb(env)->c.lock);
      for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 -        if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
 +        if ((idxmap >> mmu_idx) & 1) {
              tlb_flush_page_locked(env, mmu_idx, addr);
          }
      }
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
      tb_flush_jmp_cache(cpu, addr);
  }
-+/**
++static void mttcg_start_vcpu_thread(CPUState *cpu)
 + * tlb_flush_page_by_mmuidx_async_1:
 + * @cpu: cpu on which to flush
 + * @data: encoded addr + idxmap
 + *
 + * Helper for tlb_flush_page_by_mmuidx and friends, called through
 + * async_run_on_cpu.  The idxmap parameter is encoded in the page
 + * offset of the target_ptr field.  This limits the set of mmu_idx
 + * that can be passed via this method.
 + */
 +static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
 +                                             run_on_cpu_data data)
 +{
-+    target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
++    char thread_name[VCPU_THREAD_NAME_SIZE];
-+    target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
++
-+    uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
++    g_assert(tcg_enabled());
 +
-+    tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
++    parallel_cpus = (current_machine->smp.max_cpus > 1);
 +
 +    cpu->thread = g_malloc0(sizeof(QemuThread));
 +    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
 +    qemu_cond_init(cpu->halt_cond);
 +
 +    /* create a thread per vCPU with TCG (MTTCG) */
 +    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
 +             cpu->cpu_index);
 +
 +    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
 +                       cpu, QEMU_THREAD_JOINABLE);
 +
 +#ifdef _WIN32
 +    cpu->hThread = qemu_thread_get_handle(cpu->thread);
 +#endif
 +}
 +
-+typedef struct {
+ const CpusAccel tcg_cpus_mttcg = {
-+    target_ulong addr;
+-    .create_vcpu_thread = tcg_start_vcpu_thread,
-+    uint16_t idxmap;
++    .create_vcpu_thread = mttcg_start_vcpu_thread,
-+} TLBFlushPageByMMUIdxData;
+     .kick_vcpu_thread = mttcg_kick_vcpu_thread,
-+
-+/**
+     .handle_interrupt = tcg_handle_interrupt,
-+ * tlb_flush_page_by_mmuidx_async_2:
+diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
-+ * @cpu: cpu on which to flush
+index XXXXXXX..XXXXXXX 100644
-+ * @data: allocated addr + idxmap
+--- a/accel/tcg/tcg-cpus-rr.c
-+ *
++++ b/accel/tcg/tcg-cpus-rr.c
-+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
+@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
-+ * async_run_on_cpu.  The addr+idxmap parameters are stored in a
+  * elsewhere.
-+ * TLBFlushPageByMMUIdxData structure that has been allocated
+  */
-+ * specifically for this helper.  Free the structure when done.
-+ */
+-void *tcg_rr_cpu_thread_fn(void *arg)
-+static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
++static void *tcg_rr_cpu_thread_fn(void *arg)
-+                                             run_on_cpu_data data)
+ {
      CPUState *cpu = arg;
@@ -XXX,XX +XXX,XX @@ void *tcg_rr_cpu_thread_fn(void *arg)
      return NULL;
  }
 +void rr_start_vcpu_thread(CPUState *cpu)
 +{
-+    TLBFlushPageByMMUIdxData *d = data.host_ptr;
++    char thread_name[VCPU_THREAD_NAME_SIZE];
-+
++    static QemuCond *single_tcg_halt_cond;
-+    tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
++    static QemuThread *single_tcg_cpu_thread;
-+    g_free(d);
++
 +    g_assert(tcg_enabled());
 +    parallel_cpus = false;
 +
 +    if (!single_tcg_cpu_thread) {
 +        cpu->thread = g_malloc0(sizeof(QemuThread));
 +        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
 +        qemu_cond_init(cpu->halt_cond);
 +
 +        /* share a single thread for all cpus with TCG */
 +        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
 +        qemu_thread_create(cpu->thread, thread_name,
 +                           tcg_rr_cpu_thread_fn,
 +                           cpu, QEMU_THREAD_JOINABLE);
 +
 +        single_tcg_halt_cond = cpu->halt_cond;
 +        single_tcg_cpu_thread = cpu->thread;
 +#ifdef _WIN32
 +        cpu->hThread = qemu_thread_get_handle(cpu->thread);
 +#endif
 +    } else {
 +        /* we share the thread */
 +        cpu->thread = single_tcg_cpu_thread;
 +        cpu->halt_cond = single_tcg_halt_cond;
 +        cpu->thread_id = first_cpu->thread_id;
 +        cpu->can_do_io = 1;
 +        cpu->created = true;
 +    }
 +}
 +
- void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
+ const CpusAccel tcg_cpus_rr = {
 -    .create_vcpu_thread = tcg_start_vcpu_thread,
 +    .create_vcpu_thread = rr_start_vcpu_thread,
      .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
      .handle_interrupt = tcg_handle_interrupt,
 diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.c
 +++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
  #include "hw/boards.h"
  #include "tcg-cpus.h"
 -#include "tcg-cpus-mttcg.h"
 -#include "tcg-cpus-rr.h"
  /* common functionality among all TCG variants */
 -void tcg_start_vcpu_thread(CPUState *cpu)
 -{
 -    char thread_name[VCPU_THREAD_NAME_SIZE];
 -    static QemuCond *single_tcg_halt_cond;
 -    static QemuThread *single_tcg_cpu_thread;
 -    static int tcg_region_inited;
 -
 -    assert(tcg_enabled());
 -    /*
 -     * Initialize TCG regions--once. Now is a good time, because:
 -     * (1) TCG's init context, prologue and target globals have been set up.
 -     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
 -     *     -accel flag is processed, so the check doesn't work then).
 -     */
 -    if (!tcg_region_inited) {
 -        tcg_region_inited = 1;
 -        tcg_region_init();
 -        parallel_cpus = qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
 -    }
 -
 -    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
 -        cpu->thread = g_malloc0(sizeof(QemuThread));
 -        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
 -        qemu_cond_init(cpu->halt_cond);
 -
 -        if (qemu_tcg_mttcg_enabled()) {
 -            /* create a thread per vCPU with TCG (MTTCG) */
 -            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
 -                 cpu->cpu_index);
 -
 -            qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
 -                               cpu, QEMU_THREAD_JOINABLE);
 -
 -        } else {
 -            /* share a single thread for all cpus with TCG */
 -            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
 -            qemu_thread_create(cpu->thread, thread_name,
 -                               tcg_rr_cpu_thread_fn,
 -                               cpu, QEMU_THREAD_JOINABLE);
 -
 -            single_tcg_halt_cond = cpu->halt_cond;
 -            single_tcg_cpu_thread = cpu->thread;
 -        }
 -#ifdef _WIN32
 -        cpu->hThread = qemu_thread_get_handle(cpu->thread);
 -#endif
 -    } else {
 -        /* For non-MTTCG cases we share the thread */
 -        cpu->thread = single_tcg_cpu_thread;
 -        cpu->halt_cond = single_tcg_halt_cond;
 -        cpu->thread_id = first_cpu->thread_id;
 -        cpu->can_do_io = 1;
 -        cpu->created = true;
 -    }
 -}
 -
  void qemu_tcg_destroy_vcpu(CPUState *cpu)
  {
--    target_ulong addr_and_mmu_idx;
+     cpu_thread_signal_destroyed(cpu);
 -
      tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
      /* This should already be page aligned */
 -    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
 -    addr_and_mmu_idx |= idxmap;
 +    addr &= TARGET_PAGE_MASK;
 -    if (!qemu_cpu_is_self(cpu)) {
 -        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
 -                         RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 +    if (qemu_cpu_is_self(cpu)) {
 +        tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
 +    } else if (idxmap < TARGET_PAGE_SIZE) {
 +        /*
 +         * Most targets have only a few mmu_idx.  In the case where
 +         * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
 +         * allocating memory for this operation.
 +         */
 +        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
 +                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
      } else {
 -        tlb_flush_page_by_mmuidx_async_work(
 -            cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 +        TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
 +
 +        /* Otherwise allocate a structure, freed by the worker.  */
 +        d->addr = addr;
 +        d->idxmap = idxmap;
 +        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
 +                         RUN_ON_CPU_HOST_PTR(d));
      }
  }
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
  void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
                                         uint16_t idxmap)
  {
 -    const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
 -    target_ulong addr_and_mmu_idx;
 -
      tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
      /* This should already be page aligned */
 -    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
 -    addr_and_mmu_idx |= idxmap;
 +    addr &= TARGET_PAGE_MASK;
 -    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 -    fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 +    /*
 +     * Allocate memory to hold addr+idxmap only when needed.
 +     * See tlb_flush_page_by_mmuidx for details.
 +     */
 +    if (idxmap < TARGET_PAGE_SIZE) {
 +        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 +                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 +    } else {
 +        CPUState *dst_cpu;
 +
 +        /* Allocate a separate data block for each destination cpu.  */
 +        CPU_FOREACH(dst_cpu) {
 +            if (dst_cpu != src_cpu) {
 +                TLBFlushPageByMMUIdxData *d
 +                    = g_new(TLBFlushPageByMMUIdxData, 1);
 +
 +                d->addr = addr;
 +                d->idxmap = idxmap;
 +                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
 +                                 RUN_ON_CPU_HOST_PTR(d));
 +            }
 +        }
 +    }
 +
 +    tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
  }
  void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
                                                target_ulong addr,
                                                uint16_t idxmap)
  {
 -    const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
 -    target_ulong addr_and_mmu_idx;
 -
      tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
      /* This should already be page aligned */
 -    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
 -    addr_and_mmu_idx |= idxmap;
 +    addr &= TARGET_PAGE_MASK;
 -    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 -    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
 +    /*
 +     * Allocate memory to hold addr+idxmap only when needed.
 +     * See tlb_flush_page_by_mmuidx for details.
 +     */
 +    if (idxmap < TARGET_PAGE_SIZE) {
 +        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 +                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 +        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 +                              RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 +    } else {
 +        CPUState *dst_cpu;
 +        TLBFlushPageByMMUIdxData *d;
 +
 +        /* Allocate a separate data block for each destination cpu.  */
 +        CPU_FOREACH(dst_cpu) {
 +            if (dst_cpu != src_cpu) {
 +                d = g_new(TLBFlushPageByMMUIdxData, 1);
 +                d->addr = addr;
 +                d->idxmap = idxmap;
 +                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
 +                                 RUN_ON_CPU_HOST_PTR(d));
 +            }
 +        }
 +
 +        d = g_new(TLBFlushPageByMMUIdxData, 1);
 +        d->addr = addr;
 +        d->idxmap = idxmap;
 +        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
 +                              RUN_ON_CPU_HOST_PTR(d));
 +    }
  }
  void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
 --
-.20.1
+.25.1

-[PULL 02/16] util/cacheinfo: fix crash when compiling with uClibc
+Deleted patch
-From: Carlos Santos <casantos@redhat.com>
-uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE
-but the corresponding sysconf calls returns -1, which is a valid result,
-meaning that the limit is indeterminate.
-Handle this situation using the fallback values instead of crashing due
-to an assertion failure.
-Signed-off-by: Carlos Santos <casantos@redhat.com>
-Message-Id: <20191017123713.30192-1-casantos@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- util/cacheinfo.c | 10 ++++++++--
-file changed, 8 insertions(+), 2 deletions(-)
-diff --git a/util/cacheinfo.c b/util/cacheinfo.c
-index XXXXXXX..XXXXXXX 100644
---- a/util/cacheinfo.c
-+++ b/util/cacheinfo.c
-@@ -XXX,XX +XXX,XX @@ static void sys_cache_info(int *isize, int *dsize)
- static void sys_cache_info(int *isize, int *dsize)
- {
- # ifdef _SC_LEVEL1_ICACHE_LINESIZE
--    *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
-+    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
-+    if (tmp_isize > 0) {
-+        *isize = tmp_isize;
-+    }
- # endif
- # ifdef _SC_LEVEL1_DCACHE_LINESIZE
--    *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
-+    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
-+    if (tmp_dsize > 0) {
-+        *dsize = tmp_dsize;
-+    }
- # endif
- }
- #endif /* sys_cache_info */
---
-.20.1

-[PULL 03/16] vl: Remove unused variable in configure_accelerators
+Deleted patch
-The accel_initialised variable no longer has any setters.
-Fixes: 6f6e1698a68c
-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- vl.c | 3 +--
-file changed, 1 insertion(+), 2 deletions(-)
-diff --git a/vl.c b/vl.c
-index XXXXXXX..XXXXXXX 100644
---- a/vl.c
-+++ b/vl.c
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
- {
-     const char *accel;
-     char **accel_list, **tmp;
--    bool accel_initialised = false;
-     bool init_failed = false;
-     qemu_opts_foreach(qemu_find_opts("icount"),
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
-         accel_list = g_strsplit(accel, ":", 0);
--        for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
-+        for (tmp = accel_list; tmp && *tmp; tmp++) {
-             /*
-              * Filter invalid accelerators here, to prevent obscenities
-              * such as "-machine accel=tcg,,thread=single".
---
-.20.1

-[PULL 04/16] vl: Reduce scope of variables in configure_accelerators
+Deleted patch
-The accel_list and tmp variables are only used when manufacturing
--machine accel, options based on -accel.
-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- vl.c | 3 ++-
-file changed, 2 insertions(+), 1 deletion(-)
-diff --git a/vl.c b/vl.c
-index XXXXXXX..XXXXXXX 100644
---- a/vl.c
-+++ b/vl.c
-@@ -XXX,XX +XXX,XX @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
- static void configure_accelerators(const char *progname)
- {
-     const char *accel;
--    char **accel_list, **tmp;
-     bool init_failed = false;
-     qemu_opts_foreach(qemu_find_opts("icount"),
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
-     accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
-     if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
-+        char **accel_list, **tmp;
-+
-         if (accel == NULL) {
-             /* Select the default accelerator */
-             if (!accel_find("tcg") && !accel_find("kvm")) {
---
-.20.1

-[PULL 05/16] vl: Remove useless test in configure_accelerators
+Deleted patch
-The result of g_strsplit is never NULL.
-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- vl.c | 2 +-
-file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/vl.c b/vl.c
-index XXXXXXX..XXXXXXX 100644
---- a/vl.c
-+++ b/vl.c
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
-         accel_list = g_strsplit(accel, ":", 0);
--        for (tmp = accel_list; tmp && *tmp; tmp++) {
-+        for (tmp = accel_list; *tmp; tmp++) {
-             /*
-              * Filter invalid accelerators here, to prevent obscenities
-              * such as "-machine accel=tcg,,thread=single".
---
-.20.1

-[PULL 06/16] vl: Only choose enabled accelerators in configure_accelerators
+Deleted patch
-By choosing "tcg:kvm" when kvm is not enabled, we generate
-an incorrect warning: "invalid accelerator kvm".
-At the same time, use g_str_has_suffix rather than open-coding
-the same operation.
-Presumably the inverse is also true with --disable-tcg.
-Fixes: 28a0961757fc
-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- vl.c | 21 +++++++++++++--------
-file changed, 13 insertions(+), 8 deletions(-)
-diff --git a/vl.c b/vl.c
-index XXXXXXX..XXXXXXX 100644
---- a/vl.c
-+++ b/vl.c
-@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
-         if (accel == NULL) {
-             /* Select the default accelerator */
--            if (!accel_find("tcg") && !accel_find("kvm")) {
--                error_report("No accelerator selected and"
--                             " no default accelerator available");
--                exit(1);
--            } else {
--                int pnlen = strlen(progname);
--                if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) {
-+            bool have_tcg = accel_find("tcg");
-+            bool have_kvm = accel_find("kvm");
-+
-+            if (have_tcg && have_kvm) {
-+                if (g_str_has_suffix(progname, "kvm")) {
-                     /* If the program name ends with "kvm", we prefer KVM */
-                     accel = "kvm:tcg";
-                 } else {
-                     accel = "tcg:kvm";
-                 }
-+            } else if (have_kvm) {
-+                accel = "kvm";
-+            } else if (have_tcg) {
-+                accel = "tcg";
-+            } else {
-+                error_report("No accelerator selected and"
-+                             " no default accelerator available");
-+                exit(1);
-             }
-         }
--
-         accel_list = g_strsplit(accel, ":", 0);
-         for (tmp = accel_list; *tmp; tmp++) {
---
-.20.1

-[PULL 08/16] cputlb: Make tlb_n_entries private to cputlb.c
+Deleted patch
-There are no users of this function outside cputlb.c,
-and its interface will change in the next patch.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- include/exec/cpu_ldst.h | 5 -----
- accel/tcg/cputlb.c      | 5 +++++
-files changed, 5 insertions(+), 5 deletions(-)
-diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/exec/cpu_ldst.h
-+++ b/include/exec/cpu_ldst.h
-@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
-     return (addr >> TARGET_PAGE_BITS) & size_mask;
- }
--static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
--{
--    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
--}
--
- /* Find the TLB entry corresponding to the mmu_idx + address pair.  */
- static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
-                                      target_ulong addr)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
- QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
- #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
-+static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
-+{
-+    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
-+}
-+
- static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
- {
-     return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
---
-.20.1

-[PULL 09/16] cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
+Deleted patch
-We do not need the entire CPUArchState to compute these values.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 15 ++++++++-------
-file changed, 8 insertions(+), 7 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
- QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
- #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
--static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
-+static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
- {
--    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
-+    return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
- }
--static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
-+static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
- {
--    return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
-+    return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
- }
- static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
-@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
- static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
- {
-     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
--    size_t old_size = tlb_n_entries(env, mmu_idx);
-+    size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
-     size_t rate;
-     size_t new_size = old_size;
-     int64_t now = get_clock_realtime();
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-     env_tlb(env)->d[mmu_idx].vindex = 0;
--    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
-+    memset(env_tlb(env)->f[mmu_idx].table, -1,
-+           sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
-     memset(env_tlb(env)->d[mmu_idx].vtable, -1,
-            sizeof(env_tlb(env)->d[0].vtable));
- }
-@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
-     qemu_spin_lock(&env_tlb(env)->c.lock);
-     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-         unsigned int i;
--        unsigned int n = tlb_n_entries(env, mmu_idx);
-+        unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
-         for (i = 0; i < n; i++) {
-             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
---
-.20.1

-[PULL 10/16] cputlb: Hoist tlb portions in tlb_mmu_resize_locked
+Deleted patch
-No functional change, but the smaller expressions make
-the code easier to read.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 35 +++++++++++++++++------------------
-file changed, 17 insertions(+), 18 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
- /**
-  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
-- * @env: CPU that owns the TLB
-- * @mmu_idx: MMU index of the TLB
-+ * @desc: The CPUTLBDesc portion of the TLB
-+ * @fast: The CPUTLBDescFast portion of the same TLB
-  *
-  * Called with tlb_lock_held.
-  *
-@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
-  * high), since otherwise we are likely to have a significant amount of
-  * conflict misses.
-  */
--static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
-+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
- {
--    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
--    size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
-+    size_t old_size = tlb_n_entries(fast);
-     size_t rate;
-     size_t new_size = old_size;
-     int64_t now = get_clock_realtime();
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
-         return;
-     }
--    g_free(env_tlb(env)->f[mmu_idx].table);
--    g_free(env_tlb(env)->d[mmu_idx].iotlb);
-+    g_free(fast->table);
-+    g_free(desc->iotlb);
-     tlb_window_reset(desc, now, 0);
-     /* desc->n_used_entries is cleared by the caller */
--    env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
--    env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
--    env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
-+    fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
-+    fast->table = g_try_new(CPUTLBEntry, new_size);
-+    desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
-+
-     /*
-      * If the allocations fail, try smaller sizes. We just freed some
-      * memory, so going back to half of new_size has a good chance of working.
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
-      * allocations to fail though, so we progressively reduce the allocation
-      * size, aborting if we cannot even allocate the smallest TLB we support.
-      */
--    while (env_tlb(env)->f[mmu_idx].table == NULL ||
--           env_tlb(env)->d[mmu_idx].iotlb == NULL) {
-+    while (fast->table == NULL || desc->iotlb == NULL) {
-         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
-             error_report("%s: %s", __func__, strerror(errno));
-             abort();
-         }
-         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
--        env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
-+        fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
--        g_free(env_tlb(env)->f[mmu_idx].table);
--        g_free(env_tlb(env)->d[mmu_idx].iotlb);
--        env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
--        env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
-+        g_free(fast->table);
-+        g_free(desc->iotlb);
-+        fast->table = g_try_new(CPUTLBEntry, new_size);
-+        desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
-     }
- }
- static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
- {
--    tlb_mmu_resize_locked(env, mmu_idx);
-+    tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
-     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
-     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
---
-.20.1

-[PULL 11/16] cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
+Deleted patch
-No functional change, but the smaller expressions make
-the code easier to read.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 19 ++++++++++---------
-file changed, 10 insertions(+), 9 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
- static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
- {
--    tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
--    env_tlb(env)->d[mmu_idx].n_used_entries = 0;
--    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
--    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
--    env_tlb(env)->d[mmu_idx].vindex = 0;
--    memset(env_tlb(env)->f[mmu_idx].table, -1,
--           sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
--    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
--           sizeof(env_tlb(env)->d[0].vtable));
-+    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-+    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
-+
-+    tlb_mmu_resize_locked(desc, fast);
-+    desc->n_used_entries = 0;
-+    desc->large_page_addr = -1;
-+    desc->large_page_mask = -1;
-+    desc->vindex = 0;
-+    memset(fast->table, -1, sizeof_tlb(fast));
-+    memset(desc->vtable, -1, sizeof(desc->vtable));
- }
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
---
-.20.1

-[PULL 12/16] cputlb: Split out tlb_mmu_flush_locked
+Deleted patch
-We will want to be able to flush a tlb without resizing.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 15 ++++++++++-----
-file changed, 10 insertions(+), 5 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
-     }
- }
--static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-+static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
- {
--    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
--    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
--
--    tlb_mmu_resize_locked(desc, fast);
-     desc->n_used_entries = 0;
-     desc->large_page_addr = -1;
-     desc->large_page_mask = -1;
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-     memset(desc->vtable, -1, sizeof(desc->vtable));
- }
-+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-+{
-+    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-+    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
-+
-+    tlb_mmu_resize_locked(desc, fast);
-+    tlb_mmu_flush_locked(desc, fast);
-+}
-+
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
- {
-     env_tlb(env)->d[mmu_idx].n_used_entries++;
---
-.20.1

-[PULL 13/16] cputlb: Partially merge tlb_dyn_init into tlb_init
+Deleted patch
-Merge into the only caller, but at the same time split
-out tlb_mmu_init to initialize a single tlb entry.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 33 ++++++++++++++++-----------------
-file changed, 16 insertions(+), 17 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
-     desc->window_max_entries = max_entries;
- }
--static void tlb_dyn_init(CPUArchState *env)
--{
--    int i;
--
--    for (i = 0; i < NB_MMU_MODES; i++) {
--        CPUTLBDesc *desc = &env_tlb(env)->d[i];
--        size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
--
--        tlb_window_reset(desc, get_clock_realtime(), 0);
--        desc->n_used_entries = 0;
--        env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
--        env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
--        env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
--    }
--}
--
- /**
-  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
-  * @desc: The CPUTLBDesc portion of the TLB
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-     tlb_mmu_flush_locked(desc, fast);
- }
-+static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
-+{
-+    size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
-+
-+    tlb_window_reset(desc, now, 0);
-+    desc->n_used_entries = 0;
-+    fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
-+    fast->table = g_new(CPUTLBEntry, n_entries);
-+    desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
-+}
-+
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
- {
-     env_tlb(env)->d[mmu_idx].n_used_entries++;
-@@ -XXX,XX +XXX,XX @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
- void tlb_init(CPUState *cpu)
- {
-     CPUArchState *env = cpu->env_ptr;
-+    int64_t now = get_clock_realtime();
-+    int i;
-     qemu_spin_init(&env_tlb(env)->c.lock);
-     /* Ensure that cpu_reset performs a full flush.  */
-     env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
--    tlb_dyn_init(env);
-+    for (i = 0; i < NB_MMU_MODES; i++) {
-+        tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
-+    }
- }
- /* flush_all_helper: run fn across all cpus
---
-.20.1

-[PULL 14/16] cputlb: Initialize tlbs as flushed
+Deleted patch
-There's little point in leaving these data structures half initialized,
-and relying on a flush to be done during reset.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 5 +++--
-file changed, 3 insertions(+), 2 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
-     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
-     fast->table = g_new(CPUTLBEntry, n_entries);
-     desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
-+    tlb_mmu_flush_locked(desc, fast);
- }
- static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
-@@ -XXX,XX +XXX,XX @@ void tlb_init(CPUState *cpu)
-     qemu_spin_init(&env_tlb(env)->c.lock);
--    /* Ensure that cpu_reset performs a full flush.  */
--    env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
-+    /* All tlbs are initialized flushed. */
-+    env_tlb(env)->c.dirty = 0;
-     for (i = 0; i < NB_MMU_MODES; i++) {
-         tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
---
-.20.1

-[PULL 15/16] cputlb: Hoist timestamp outside of loops over tlbs
+Deleted patch
-Do not call get_clock_realtime() in tlb_mmu_resize_locked,
-but hoist outside of any loop over a set of tlbs.  This is
-only two (indirect) callers, tlb_flush_by_mmuidx_async_work
-and tlb_flush_page_locked, so not onerous.
-Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- accel/tcg/cputlb.c | 14 ++++++++------
-file changed, 8 insertions(+), 6 deletions(-)
-diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/cputlb.c
-+++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
-  * high), since otherwise we are likely to have a significant amount of
-  * conflict misses.
-  */
--static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
-+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
-+                                  int64_t now)
- {
-     size_t old_size = tlb_n_entries(fast);
-     size_t rate;
-     size_t new_size = old_size;
--    int64_t now = get_clock_realtime();
-     int64_t window_len_ms = 100;
-     int64_t window_len_ns = window_len_ms * 1000 * 1000;
-     bool window_expired = now > desc->window_begin_ns + window_len_ns;
-@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
-     memset(desc->vtable, -1, sizeof(desc->vtable));
- }
--static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
-+                                        int64_t now)
- {
-     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-     CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
--    tlb_mmu_resize_locked(desc, fast);
-+    tlb_mmu_resize_locked(desc, fast, now);
-     tlb_mmu_flush_locked(desc, fast);
- }
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
-     CPUArchState *env = cpu->env_ptr;
-     uint16_t asked = data.host_int;
-     uint16_t all_dirty, work, to_clean;
-+    int64_t now = get_clock_realtime();
-     assert_cpu_is_self(cpu);
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
-     for (work = to_clean; work != 0; work &= work - 1) {
-         int mmu_idx = ctz32(work);
--        tlb_flush_one_mmuidx_locked(env, mmu_idx);
-+        tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
-     }
-     qemu_spin_unlock(&env_tlb(env)->c.lock);
-@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
-         tlb_debug("forcing full flush midx %d ("
-                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
-                   midx, lp_addr, lp_mask);
--        tlb_flush_one_mmuidx_locked(env, midx);
-+        tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
-     } else {
-         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
-             tlb_n_used_entries_dec(env, midx);
---
-.20.1

-[PULL 16/16] scripts/git.orderfile: Display decodetree before C source
+[PULL 3/3] accel/tcg: rename tcg-cpus functions to match module name
-From: Philippe Mathieu-Daudé <philmd@redhat.com>
+From: Claudio Fontana <cfontana@suse.de>
-To avoid scrolling each instruction when reviewing tcg
+Signed-off-by: Claudio Fontana <cfontana@suse.de>
-helpers written for the decodetree script, display the
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
-.decode files (similar to header declarations) before
+Message-Id: <20201015143217.29337-4-cfontana@suse.de>
 the C source (implementation of previous declarations).
 Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
 Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Message-Id: <20191230082856.30556-1-philmd@redhat.com>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- scripts/git.orderfile | 3 +++
+ accel/tcg/tcg-cpus-icount.h |  6 +--
-file changed, 3 insertions(+)
+ accel/tcg/tcg-cpus-rr.h     |  2 +-
  accel/tcg/tcg-cpus.h        |  6 +--
  accel/tcg/tcg-cpus-icount.c | 24 ++++++------
  accel/tcg/tcg-cpus-mttcg.c  | 10 ++---
  accel/tcg/tcg-cpus-rr.c     | 74 ++++++++++++++++++-------------------
  accel/tcg/tcg-cpus.c        |  6 +--
 files changed, 64 insertions(+), 64 deletions(-)
-diff --git a/scripts/git.orderfile b/scripts/git.orderfile
+diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
 index XXXXXXX..XXXXXXX 100644
---- a/scripts/git.orderfile
+--- a/accel/tcg/tcg-cpus-icount.h
-+++ b/scripts/git.orderfile
++++ b/accel/tcg/tcg-cpus-icount.h
-@@ -XXX,XX +XXX,XX @@ qga/*.json
+@@ -XXX,XX +XXX,XX @@
- # headers
+ #ifndef TCG_CPUS_ICOUNT_H
- *.h
+ #define TCG_CPUS_ICOUNT_H
-+# decoding tree specification
+-void handle_icount_deadline(void);
-+*.decode
+-void prepare_icount_for_run(CPUState *cpu);
-+
+-void process_icount_data(CPUState *cpu);
- # code
++void icount_handle_deadline(void);
- *.c
++void icount_prepare_for_run(CPUState *cpu);
 +void icount_process_data(CPUState *cpu);
  #endif /* TCG_CPUS_ICOUNT_H */
 diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-rr.h
 +++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
  #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
  /* Kick all RR vCPUs. */
 -void qemu_cpu_kick_rr_cpus(CPUState *unused);
 +void rr_kick_vcpu_thread(CPUState *unused);
  /* start the round robin vcpu thread */
  void rr_start_vcpu_thread(CPUState *cpu);
 diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.h
 +++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
  extern const CpusAccel tcg_cpus_icount;
  extern const CpusAccel tcg_cpus_rr;
 -void qemu_tcg_destroy_vcpu(CPUState *cpu);
 -int tcg_cpu_exec(CPUState *cpu);
 -void tcg_handle_interrupt(CPUState *cpu, int mask);
 +void tcg_cpus_destroy(CPUState *cpu);
 +int tcg_cpus_exec(CPUState *cpu);
 +void tcg_cpus_handle_interrupt(CPUState *cpu, int mask);
  #endif /* TCG_CPUS_H */
 diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-icount.c
 +++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@
  #include "tcg-cpus-icount.h"
  #include "tcg-cpus-rr.h"
 -static int64_t tcg_get_icount_limit(void)
 +static int64_t icount_get_limit(void)
  {
      int64_t deadline;
@@ -XXX,XX +XXX,XX @@ static int64_t tcg_get_icount_limit(void)
      }
  }
 -static void notify_aio_contexts(void)
 +static void icount_notify_aio_contexts(void)
  {
      /* Wake up other AioContexts.  */
      qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
      qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
  }
 -void handle_icount_deadline(void)
 +void icount_handle_deadline(void)
  {
      assert(qemu_in_vcpu_thread());
      int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
                                                    QEMU_TIMER_ATTR_ALL);
      if (deadline == 0) {
 -        notify_aio_contexts();
 +        icount_notify_aio_contexts();
      }
  }
 -void prepare_icount_for_run(CPUState *cpu)
 +void icount_prepare_for_run(CPUState *cpu)
  {
      int insns_left;
      /*
 -     * These should always be cleared by process_icount_data after
 +     * These should always be cleared by icount_process_data after
       * each vCPU execution. However u16.high can be raised
 -     * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
 +     * asynchronously by cpu_exit/cpu_interrupt/tcg_cpus_handle_interrupt
       */
      g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
      g_assert(cpu->icount_extra == 0);
 -    cpu->icount_budget = tcg_get_icount_limit();
 +    cpu->icount_budget = icount_get_limit();
      insns_left = MIN(0xffff, cpu->icount_budget);
      cpu_neg(cpu)->icount_decr.u16.low = insns_left;
      cpu->icount_extra = cpu->icount_budget - insns_left;
@@ -XXX,XX +XXX,XX @@ void prepare_icount_for_run(CPUState *cpu)
      replay_mutex_lock();
      if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
 -        notify_aio_contexts();
 +        icount_notify_aio_contexts();
      }
  }
 -void process_icount_data(CPUState *cpu)
 +void icount_process_data(CPUState *cpu)
  {
      /* Account for executed instructions */
      icount_update(cpu);
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
  {
      int old_mask = cpu->interrupt_request;
 -    tcg_handle_interrupt(cpu, mask);
 +    tcg_cpus_handle_interrupt(cpu, mask);
      if (qemu_cpu_is_self(cpu) &&
          !cpu->can_do_io
          && (mask & ~old_mask) != 0) {
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
  const CpusAccel tcg_cpus_icount = {
      .create_vcpu_thread = rr_start_vcpu_thread,
 -    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 +    .kick_vcpu_thread = rr_kick_vcpu_thread,
      .handle_interrupt = icount_handle_interrupt,
      .get_virtual_clock = icount_get,
 diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-mttcg.c
 +++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
   * current CPUState for a given thread.
   */
 -static void *tcg_cpu_thread_fn(void *arg)
 +static void *mttcg_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
          if (cpu_can_run(cpu)) {
              int r;
              qemu_mutex_unlock_iothread();
 -            r = tcg_cpu_exec(cpu);
 +            r = tcg_cpus_exec(cpu);
              qemu_mutex_lock_iothread();
              switch (r) {
              case EXCP_DEBUG:
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
          qemu_wait_io_event(cpu);
      } while (!cpu->unplug || cpu_can_run(cpu));
 -    qemu_tcg_destroy_vcpu(cpu);
 +    tcg_cpus_destroy(cpu);
      qemu_mutex_unlock_iothread();
      rcu_unregister_thread();
      return NULL;
@@ -XXX,XX +XXX,XX @@ static void mttcg_start_vcpu_thread(CPUState *cpu)
      snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
               cpu->cpu_index);
 -    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
 +    qemu_thread_create(cpu->thread, thread_name, mttcg_cpu_thread_fn,
                         cpu, QEMU_THREAD_JOINABLE);
  #ifdef _WIN32
@@ -XXX,XX +XXX,XX @@ const CpusAccel tcg_cpus_mttcg = {
      .create_vcpu_thread = mttcg_start_vcpu_thread,
      .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 -    .handle_interrupt = tcg_handle_interrupt,
 +    .handle_interrupt = tcg_cpus_handle_interrupt,
  };
 diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus-rr.c
 +++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@
  #include "tcg-cpus-icount.h"
  /* Kick all RR vCPUs */
 -void qemu_cpu_kick_rr_cpus(CPUState *unused)
 +void rr_kick_vcpu_thread(CPUState *unused)
  {
      CPUState *cpu;
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick_rr_cpus(CPUState *unused)
   * idleness is complete.
   */
 -static QEMUTimer *tcg_kick_vcpu_timer;
 -static CPUState *tcg_current_rr_cpu;
 +static QEMUTimer *rr_kick_vcpu_timer;
 +static CPUState *rr_current_cpu;
  #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 -static inline int64_t qemu_tcg_next_kick(void)
 +static inline int64_t rr_next_kick_time(void)
  {
      return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
  }
  /* Kick the currently round-robin scheduled vCPU to next */
 -static void qemu_cpu_kick_rr_next_cpu(void)
 +static void rr_kick_next_cpu(void)
  {
      CPUState *cpu;
      do {
 -        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
 +        cpu = qatomic_mb_read(&rr_current_cpu);
          if (cpu) {
              cpu_exit(cpu);
          }
 -    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
 +    } while (cpu != qatomic_mb_read(&rr_current_cpu));
  }
 -static void kick_tcg_thread(void *opaque)
 +static void rr_kick_thread(void *opaque)
  {
 -    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 -    qemu_cpu_kick_rr_next_cpu();
 +    timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
 +    rr_kick_next_cpu();
  }
 -static void start_tcg_kick_timer(void)
 +static void rr_start_kick_timer(void)
  {
 -    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 -        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 -                                           kick_tcg_thread, NULL);
 +    if (!rr_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 +        rr_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 +                                           rr_kick_thread, NULL);
      }
 -    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
 -        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 +    if (rr_kick_vcpu_timer && !timer_pending(rr_kick_vcpu_timer)) {
 +        timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
      }
  }
 -static void stop_tcg_kick_timer(void)
 +static void rr_stop_kick_timer(void)
  {
 -    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
 -        timer_del(tcg_kick_vcpu_timer);
 +    if (rr_kick_vcpu_timer && timer_pending(rr_kick_vcpu_timer)) {
 +        timer_del(rr_kick_vcpu_timer);
      }
  }
 -static void qemu_tcg_rr_wait_io_event(void)
 +static void rr_wait_io_event(void)
  {
      CPUState *cpu;
      while (all_cpu_threads_idle()) {
 -        stop_tcg_kick_timer();
 +        rr_stop_kick_timer();
          qemu_cond_wait_iothread(first_cpu->halt_cond);
      }
 -    start_tcg_kick_timer();
 +    rr_start_kick_timer();
      CPU_FOREACH(cpu) {
          qemu_wait_io_event_common(cpu);
@@ -XXX,XX +XXX,XX @@ static void qemu_tcg_rr_wait_io_event(void)
   * Destroy any remaining vCPUs which have been unplugged and have
   * finished running
   */
 -static void deal_with_unplugged_cpus(void)
 +static void rr_deal_with_unplugged_cpus(void)
  {
      CPUState *cpu;
      CPU_FOREACH(cpu) {
          if (cpu->unplug && !cpu_can_run(cpu)) {
 -            qemu_tcg_destroy_vcpu(cpu);
 +            tcg_cpus_destroy(cpu);
              break;
          }
      }
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
   * elsewhere.
   */
 -static void *tcg_rr_cpu_thread_fn(void *arg)
 +static void *rr_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
          }
      }
 -    start_tcg_kick_timer();
 +    rr_start_kick_timer();
      cpu = first_cpu;
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
               * Run the timers here.  This is much more efficient than
               * waking up the I/O thread and waiting for completion.
               */
 -            handle_icount_deadline();
 +            icount_handle_deadline();
          }
          replay_mutex_unlock();
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
          while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
 -            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
 +            qatomic_mb_set(&rr_current_cpu, cpu);
              current_cpu = cpu;
              qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
                  qemu_mutex_unlock_iothread();
                  if (icount_enabled()) {
 -                    prepare_icount_for_run(cpu);
 +                    icount_prepare_for_run(cpu);
                  }
 -                r = tcg_cpu_exec(cpu);
 +                r = tcg_cpus_exec(cpu);
                  if (icount_enabled()) {
 -                    process_icount_data(cpu);
 +                    icount_process_data(cpu);
                  }
                  qemu_mutex_lock_iothread();
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
          } /* while (cpu && !cpu->exit_request).. */
          /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
 -        qatomic_set(&tcg_current_rr_cpu, NULL);
 +        qatomic_set(&rr_current_cpu, NULL);
          if (cpu && cpu->exit_request) {
              qatomic_mb_set(&cpu->exit_request, 0);
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
              qemu_notify_event();
          }
 -        qemu_tcg_rr_wait_io_event();
 -        deal_with_unplugged_cpus();
 +        rr_wait_io_event();
 +        rr_deal_with_unplugged_cpus();
      }
      rcu_unregister_thread();
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
          /* share a single thread for all cpus with TCG */
          snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
          qemu_thread_create(cpu->thread, thread_name,
 -                           tcg_rr_cpu_thread_fn,
 +                           rr_cpu_thread_fn,
                             cpu, QEMU_THREAD_JOINABLE);
          single_tcg_halt_cond = cpu->halt_cond;
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
  const CpusAccel tcg_cpus_rr = {
      .create_vcpu_thread = rr_start_vcpu_thread,
 -    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 +    .kick_vcpu_thread = rr_kick_vcpu_thread,
 -    .handle_interrupt = tcg_handle_interrupt,
 +    .handle_interrupt = tcg_cpus_handle_interrupt,
  };
 diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/tcg-cpus.c
 +++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
  /* common functionality among all TCG variants */
 -void qemu_tcg_destroy_vcpu(CPUState *cpu)
 +void tcg_cpus_destroy(CPUState *cpu)
  {
      cpu_thread_signal_destroyed(cpu);
  }
 -int tcg_cpu_exec(CPUState *cpu)
 +int tcg_cpus_exec(CPUState *cpu)
  {
      int ret;
  #ifdef CONFIG_PROFILER
@@ -XXX,XX +XXX,XX @@ int tcg_cpu_exec(CPUState *cpu)
  }
  /* mask must never be zero, except for A20 change call */
 -void tcg_handle_interrupt(CPUState *cpu, int mask)
 +void tcg_cpus_handle_interrupt(CPUState *cpu, int mask)
  {
      g_assert(qemu_mutex_iothread_locked());
 --
-.20.1
+.25.1

The following changes since commit 3e08b2b9cb64bff2b73fa9128c0e49bfcde0dd40:

Merge remote-tracking branch 'remotes/philmd-gitlab/tags/edk2-next-20200121' into staging (2020-01-21 15:29:25 +0000)

are available in the Git repository at:

https://github.com/rth7680/qemu.git tags/pull-tcg-20200121

for you to fetch changes up to 75fa376cdab5e5db2c7fdd107358e16f95503ac6:

scripts/git.orderfile: Display decodetree before C source (2020-01-21 15:26:09 -1000)

----------------------------------------------------------------
Remove another limit to NB_MMU_MODES.
Fix compilation using uclibc.
Fix defaulting of -accel parameters.
Tidy cputlb basic routines.
Adjust git.orderfile for decodetree.

----------------------------------------------------------------
Carlos Santos (1):
      util/cacheinfo: fix crash when compiling with uClibc

Philippe Mathieu-Daudé (1):
      scripts/git.orderfile: Display decodetree before C source

Richard Henderson (14):
      cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
      vl: Remove unused variable in configure_accelerators
      vl: Reduce scope of variables in configure_accelerators
      vl: Remove useless test in configure_accelerators
      vl: Only choose enabled accelerators in configure_accelerators
      cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
      cputlb: Make tlb_n_entries private to cputlb.c
      cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
      cputlb: Hoist tlb portions in tlb_mmu_resize_locked
      cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
      cputlb: Split out tlb_mmu_flush_locked
      cputlb: Partially merge tlb_dyn_init into tlb_init
      cputlb: Initialize tlbs as flushed
      cputlb: Hoist timestamp outside of loops over tlbs

In target/arm we will shortly have "too many" mmu_idx.
The current minimum barrier is caused by the way in which
tlb_flush_page_by_mmuidx is coded.

We can remove this limitation by allocating memory for
consumption by the worker.  Let us assume that this is
the unlikely case, as will be the case for the majority
of targets which have so far satisfied the BUILD_BUG_ON,
and only allocate memory when necessary.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 132 insertions(+), 35 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
     }
 }
 
-/* As we are going to hijack the bottom bits of the page address for a
- * mmuidx bit mask we need to fail to build if we can't do that
+/**
+ * tlb_flush_page_by_mmuidx_async_0:
+ * @cpu: cpu on which to flush
+ * @addr: page of virtual address to flush
+ * @idxmap: set of mmu_idx to flush
+ *
+ * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
+ * at @addr from the tlbs indicated by @idxmap from @cpu.
  */
-QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
-
-static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
-                                                run_on_cpu_data data)
+static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
+                                             target_ulong addr,
+                                             uint16_t idxmap)
 {
     CPUArchState *env = cpu->env_ptr;
-    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
-    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
-    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
     int mmu_idx;
 
     assert_cpu_is_self(cpu);
 
-    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
-              addr, mmu_idx_bitmap);
+    tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
 
     qemu_spin_lock(&env_tlb(env)->c.lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-        if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
+        if ((idxmap >> mmu_idx) & 1) {
             tlb_flush_page_locked(env, mmu_idx, addr);
         }
     }
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
     tb_flush_jmp_cache(cpu, addr);
 }
 
+/**
+ * tlb_flush_page_by_mmuidx_async_1:
+ * @cpu: cpu on which to flush
+ * @data: encoded addr + idxmap
+ *
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
+ * async_run_on_cpu.  The idxmap parameter is encoded in the page
+ * offset of the target_ptr field.  This limits the set of mmu_idx
+ * that can be passed via this method.
+ */
+static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
+                                             run_on_cpu_data data)
+{
+    target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
+    target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
+    uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
+
+    tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
+}
+
+typedef struct {
+    target_ulong addr;
+    uint16_t idxmap;
+} TLBFlushPageByMMUIdxData;
+
+/**
+ * tlb_flush_page_by_mmuidx_async_2:
+ * @cpu: cpu on which to flush
+ * @data: allocated addr + idxmap
+ *
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
+ * async_run_on_cpu.  The addr+idxmap parameters are stored in a
+ * TLBFlushPageByMMUIdxData structure that has been allocated
+ * specifically for this helper.  Free the structure when done.
+ */
+static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
+                                             run_on_cpu_data data)
+{
+    TLBFlushPageByMMUIdxData *d = data.host_ptr;
+
+    tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
+    g_free(d);
+}
+
 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
 {
-    target_ulong addr_and_mmu_idx;
-
     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
 
     /* This should already be page aligned */
-    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
-    addr_and_mmu_idx |= idxmap;
+    addr &= TARGET_PAGE_MASK;
 
-    if (!qemu_cpu_is_self(cpu)) {
-        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
-                         RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    if (qemu_cpu_is_self(cpu)) {
+        tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
+    } else if (idxmap < TARGET_PAGE_SIZE) {
+        /*
+         * Most targets have only a few mmu_idx.  In the case where
+         * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
+         * allocating memory for this operation.
+         */
+        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
     } else {
-        tlb_flush_page_by_mmuidx_async_work(
-            cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+        TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
+
+        /* Otherwise allocate a structure, freed by the worker.  */
+        d->addr = addr;
+        d->idxmap = idxmap;
+        async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
+                         RUN_ON_CPU_HOST_PTR(d));
     }
 }
 
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
                                        uint16_t idxmap)
 {
-    const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
-    target_ulong addr_and_mmu_idx;
-
     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 
     /* This should already be page aligned */
-    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
-    addr_and_mmu_idx |= idxmap;
+    addr &= TARGET_PAGE_MASK;
 
-    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
-    fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    /*
+     * Allocate memory to hold addr+idxmap only when needed.
+     * See tlb_flush_page_by_mmuidx for details.
+     */
+    if (idxmap < TARGET_PAGE_SIZE) {
+        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
+    } else {
+        CPUState *dst_cpu;
+
+        /* Allocate a separate data block for each destination cpu.  */
+        CPU_FOREACH(dst_cpu) {
+            if (dst_cpu != src_cpu) {
+                TLBFlushPageByMMUIdxData *d
+                    = g_new(TLBFlushPageByMMUIdxData, 1);
+
+                d->addr = addr;
+                d->idxmap = idxmap;
+                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
+                                 RUN_ON_CPU_HOST_PTR(d));
+            }
+        }
+    }
+
+    tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
 }
 
 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
                                               target_ulong addr,
                                               uint16_t idxmap)
 {
-    const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
-    target_ulong addr_and_mmu_idx;
-
     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 
     /* This should already be page aligned */
-    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
-    addr_and_mmu_idx |= idxmap;
+    addr &= TARGET_PAGE_MASK;
 
-    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
-    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    /*
+     * Allocate memory to hold addr+idxmap only when needed.
+     * See tlb_flush_page_by_mmuidx for details.
+     */
+    if (idxmap < TARGET_PAGE_SIZE) {
+        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
+                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
+        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
+                              RUN_ON_CPU_TARGET_PTR(addr | idxmap));
+    } else {
+        CPUState *dst_cpu;
+        TLBFlushPageByMMUIdxData *d;
+
+        /* Allocate a separate data block for each destination cpu.  */
+        CPU_FOREACH(dst_cpu) {
+            if (dst_cpu != src_cpu) {
+                d = g_new(TLBFlushPageByMMUIdxData, 1);
+                d->addr = addr;
+                d->idxmap = idxmap;
+                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
+                                 RUN_ON_CPU_HOST_PTR(d));
+            }
+        }
+
+        d = g_new(TLBFlushPageByMMUIdxData, 1);
+        d->addr = addr;
+        d->idxmap = idxmap;
+        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
+                              RUN_ON_CPU_HOST_PTR(d));
+    }
 }
 
 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
-- 
2.20.1

From: Carlos Santos <casantos@redhat.com>

uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE
but the corresponding sysconf calls returns -1, which is a valid result,
meaning that the limit is indeterminate.

Handle this situation using the fallback values instead of crashing due
to an assertion failure.

Signed-off-by: Carlos Santos <casantos@redhat.com>
Message-Id: <20191017123713.30192-1-casantos@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 util/cacheinfo.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/util/cacheinfo.c b/util/cacheinfo.c
index XXXXXXX..XXXXXXX 100644
--- a/util/cacheinfo.c
+++ b/util/cacheinfo.c
@@ -XXX,XX +XXX,XX @@ static void sys_cache_info(int *isize, int *dsize)
 static void sys_cache_info(int *isize, int *dsize)
 {
 # ifdef _SC_LEVEL1_ICACHE_LINESIZE
-    *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+    if (tmp_isize > 0) {
+        *isize = tmp_isize;
+    }
 # endif
 # ifdef _SC_LEVEL1_DCACHE_LINESIZE
-    *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+    if (tmp_dsize > 0) {
+        *dsize = tmp_dsize;
+    }
 # endif
 }
 #endif /* sys_cache_info */
-- 
2.20.1

The accel_initialised variable no longer has any setters.

Fixes: 6f6e1698a68c
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 vl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vl.c b/vl.c
index XXXXXXX..XXXXXXX 100644
--- a/vl.c
+++ b/vl.c
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
 {
     const char *accel;
     char **accel_list, **tmp;
-    bool accel_initialised = false;
     bool init_failed = false;
 
     qemu_opts_foreach(qemu_find_opts("icount"),
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
 
         accel_list = g_strsplit(accel, ":", 0);
 
-        for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
+        for (tmp = accel_list; tmp && *tmp; tmp++) {
             /*
              * Filter invalid accelerators here, to prevent obscenities
              * such as "-machine accel=tcg,,thread=single".
-- 
2.20.1

The accel_list and tmp variables are only used when manufacturing
-machine accel, options based on -accel.

Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 vl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vl.c b/vl.c
index XXXXXXX..XXXXXXX 100644
--- a/vl.c
+++ b/vl.c
@@ -XXX,XX +XXX,XX @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
 static void configure_accelerators(const char *progname)
 {
     const char *accel;
-    char **accel_list, **tmp;
     bool init_failed = false;
 
     qemu_opts_foreach(qemu_find_opts("icount"),
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
 
     accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
     if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
+        char **accel_list, **tmp;
+
         if (accel == NULL) {
             /* Select the default accelerator */
             if (!accel_find("tcg") && !accel_find("kvm")) {
-- 
2.20.1

By choosing "tcg:kvm" when kvm is not enabled, we generate
an incorrect warning: "invalid accelerator kvm".

At the same time, use g_str_has_suffix rather than open-coding
the same operation.

Presumably the inverse is also true with --disable-tcg.

Fixes: 28a0961757fc
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 vl.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/vl.c b/vl.c
index XXXXXXX..XXXXXXX 100644
--- a/vl.c
+++ b/vl.c
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
 
         if (accel == NULL) {
             /* Select the default accelerator */
-            if (!accel_find("tcg") && !accel_find("kvm")) {
-                error_report("No accelerator selected and"
-                             " no default accelerator available");
-                exit(1);
-            } else {
-                int pnlen = strlen(progname);
-                if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) {
+            bool have_tcg = accel_find("tcg");
+            bool have_kvm = accel_find("kvm");
+
+            if (have_tcg && have_kvm) {
+                if (g_str_has_suffix(progname, "kvm")) {
                     /* If the program name ends with "kvm", we prefer KVM */
                     accel = "kvm:tcg";
                 } else {
                     accel = "tcg:kvm";
                 }
+            } else if (have_kvm) {
+                accel = "kvm";
+            } else if (have_tcg) {
+                accel = "tcg";
+            } else {
+                error_report("No accelerator selected and"
+                             " no default accelerator available");
+                exit(1);
             }
         }
-
         accel_list = g_strsplit(accel, ":", 0);
 
         for (tmp = accel_list; *tmp; tmp++) {
-- 
2.20.1

There is only one caller for tlb_table_flush_by_mmuidx.  Place
the result at the earlier line number, due to an expected user
in the near future.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
     }
 }
 
-static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 {
     tlb_mmu_resize_locked(env, mmu_idx);
-    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
+    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
+    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
+    env_tlb(env)->d[mmu_idx].vindex = 0;
+    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
+    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
+           sizeof(env_tlb(env)->d[0].vtable));
 }
 
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
@@ -XXX,XX +XXX,XX @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
     *pelide = elide;
 }
 
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
-{
-    tlb_table_flush_by_mmuidx(env, mmu_idx);
-    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-    env_tlb(env)->d[mmu_idx].vindex = 0;
-    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
-           sizeof(env_tlb(env)->d[0].vtable));
-}
-
 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 {
     CPUArchState *env = cpu->env_ptr;
-- 
2.20.1

There are no users of this function outside cputlb.c,
and its interface will change in the next patch.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu_ldst.h | 5 -----
 accel/tcg/cputlb.c      | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
     return (addr >> TARGET_PAGE_BITS) & size_mask;
 }
 
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
-{
-    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
-}
-
 /* Find the TLB entry corresponding to the mmu_idx + address pair.  */
 static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
                                      target_ulong addr)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
 
+static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
+{
+    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
+}
+
 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
 {
     return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
-- 
2.20.1

We do not need the entire CPUArchState to compute these values.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
 
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
+static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
 {
-    return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
+    return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
 }
 
-static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
+static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
 {
-    return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
+    return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
 }
 
 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
 {
     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-    size_t old_size = tlb_n_entries(env, mmu_idx);
+    size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
     size_t rate;
     size_t new_size = old_size;
     int64_t now = get_clock_realtime();
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
     env_tlb(env)->d[mmu_idx].vindex = 0;
-    memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
+    memset(env_tlb(env)->f[mmu_idx].table, -1,
+           sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
     memset(env_tlb(env)->d[mmu_idx].vtable, -1,
            sizeof(env_tlb(env)->d[0].vtable));
 }
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
     qemu_spin_lock(&env_tlb(env)->c.lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         unsigned int i;
-        unsigned int n = tlb_n_entries(env, mmu_idx);
+        unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
 
         for (i = 0; i < n; i++) {
             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
-- 
2.20.1

No functional change, but the smaller expressions make
the code easier to read.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
 
 /**
  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
- * @env: CPU that owns the TLB
- * @mmu_idx: MMU index of the TLB
+ * @desc: The CPUTLBDesc portion of the TLB
+ * @fast: The CPUTLBDescFast portion of the same TLB
  *
  * Called with tlb_lock_held.
  *
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
  * high), since otherwise we are likely to have a significant amount of
  * conflict misses.
  */
-static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 {
-    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-    size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
+    size_t old_size = tlb_n_entries(fast);
     size_t rate;
     size_t new_size = old_size;
     int64_t now = get_clock_realtime();
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
         return;
     }
 
-    g_free(env_tlb(env)->f[mmu_idx].table);
-    g_free(env_tlb(env)->d[mmu_idx].iotlb);
+    g_free(fast->table);
+    g_free(desc->iotlb);
 
     tlb_window_reset(desc, now, 0);
     /* desc->n_used_entries is cleared by the caller */
-    env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
-    env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
-    env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
+    fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
+    fast->table = g_try_new(CPUTLBEntry, new_size);
+    desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
+
     /*
      * If the allocations fail, try smaller sizes. We just freed some
      * memory, so going back to half of new_size has a good chance of working.
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
      * allocations to fail though, so we progressively reduce the allocation
      * size, aborting if we cannot even allocate the smallest TLB we support.
      */
-    while (env_tlb(env)->f[mmu_idx].table == NULL ||
-           env_tlb(env)->d[mmu_idx].iotlb == NULL) {
+    while (fast->table == NULL || desc->iotlb == NULL) {
         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
             error_report("%s: %s", __func__, strerror(errno));
             abort();
         }
         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
-        env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
+        fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 
-        g_free(env_tlb(env)->f[mmu_idx].table);
-        g_free(env_tlb(env)->d[mmu_idx].iotlb);
-        env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
-        env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
+        g_free(fast->table);
+        g_free(desc->iotlb);
+        fast->table = g_try_new(CPUTLBEntry, new_size);
+        desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
     }
 }
 
 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 {
-    tlb_mmu_resize_locked(env, mmu_idx);
+    tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
     env_tlb(env)->d[mmu_idx].n_used_entries = 0;
     env_tlb(env)->d[mmu_idx].large_page_addr = -1;
     env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-- 
2.20.1

No functional change, but the smaller expressions make
the code easier to read.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 
 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
 {
-    tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
-    env_tlb(env)->d[mmu_idx].n_used_entries = 0;
-    env_tlb(env)->d[mmu_idx].large_page_addr = -1;
-    env_tlb(env)->d[mmu_idx].large_page_mask = -1;
-    env_tlb(env)->d[mmu_idx].vindex = 0;
-    memset(env_tlb(env)->f[mmu_idx].table, -1,
-           sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
-    memset(env_tlb(env)->d[mmu_idx].vtable, -1,
-           sizeof(env_tlb(env)->d[0].vtable));
+    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
+    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
+
+    tlb_mmu_resize_locked(desc, fast);
+    desc->n_used_entries = 0;
+    desc->large_page_addr = -1;
+    desc->large_page_mask = -1;
+    desc->vindex = 0;
+    memset(fast->table, -1, sizeof_tlb(fast));
+    memset(desc->vtable, -1, sizeof(desc->vtable));
 }
 
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
-- 
2.20.1

We will want to be able to flush a tlb without resizing.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
     }
 }
 
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
+static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 {
-    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
-    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
-
-    tlb_mmu_resize_locked(desc, fast);
     desc->n_used_entries = 0;
     desc->large_page_addr = -1;
     desc->large_page_mask = -1;
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
     memset(desc->vtable, -1, sizeof(desc->vtable));
 }
 
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
+{
+    CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
+    CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
+
+    tlb_mmu_resize_locked(desc, fast);
+    tlb_mmu_flush_locked(desc, fast);
+}
+
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
 {
     env_tlb(env)->d[mmu_idx].n_used_entries++;
-- 
2.20.1

Merge into the only caller, but at the same time split
out tlb_mmu_init to initialize a single tlb entry.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
     desc->window_max_entries = max_entries;
 }
 
-static void tlb_dyn_init(CPUArchState *env)
-{
-    int i;
-
-    for (i = 0; i < NB_MMU_MODES; i++) {
-        CPUTLBDesc *desc = &env_tlb(env)->d[i];
-        size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
-
-        tlb_window_reset(desc, get_clock_realtime(), 0);
-        desc->n_used_entries = 0;
-        env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
-        env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
-        env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
-    }
-}
-
 /**
  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
  * @desc: The CPUTLBDesc portion of the TLB
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
     tlb_mmu_flush_locked(desc, fast);
 }
 
+static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
+{
+    size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
+
+    tlb_window_reset(desc, now, 0);
+    desc->n_used_entries = 0;
+    fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
+    fast->table = g_new(CPUTLBEntry, n_entries);
+    desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
+}
+
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
 {
     env_tlb(env)->d[mmu_idx].n_used_entries++;
@@ -XXX,XX +XXX,XX @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
 void tlb_init(CPUState *cpu)
 {
     CPUArchState *env = cpu->env_ptr;
+    int64_t now = get_clock_realtime();
+    int i;
 
     qemu_spin_init(&env_tlb(env)->c.lock);
 
     /* Ensure that cpu_reset performs a full flush.  */
     env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
 
-    tlb_dyn_init(env);
+    for (i = 0; i < NB_MMU_MODES; i++) {
+        tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
+    }
 }
 
 /* flush_all_helper: run fn across all cpus
-- 
2.20.1

There's little point in leaving these data structures half initialized,
and relying on a flush to be done during reset.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
     fast->table = g_new(CPUTLBEntry, n_entries);
     desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
+    tlb_mmu_flush_locked(desc, fast);
 }
 
 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
@@ -XXX,XX +XXX,XX @@ void tlb_init(CPUState *cpu)
 
     qemu_spin_init(&env_tlb(env)->c.lock);
 
-    /* Ensure that cpu_reset performs a full flush.  */
-    env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
+    /* All tlbs are initialized flushed. */
+    env_tlb(env)->c.dirty = 0;
 
     for (i = 0; i < NB_MMU_MODES; i++) {
         tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
-- 
2.20.1

Do not call get_clock_realtime() in tlb_mmu_resize_locked,
but hoist outside of any loop over a set of tlbs.  This is
only two (indirect) callers, tlb_flush_by_mmuidx_async_work
and tlb_flush_page_locked, so not onerous.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
  * high), since otherwise we are likely to have a significant amount of
  * conflict misses.
  */
-static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
+                                  int64_t now)
 {
     size_t old_size = tlb_n_entries(fast);
     size_t rate;
     size_t new_size = old_size;
-    int64_t now = get_clock_realtime();
     int64_t window_len_ms = 100;
     int64_t window_len_ns = window_len_ms * 1000 * 1000;
     bool window_expired = now > desc->window_begin_ns + window_len_ns;
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
     memset(desc->vtable, -1, sizeof(desc->vtable));
 }
 
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
+                                        int64_t now)
 {
     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
     CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
 
-    tlb_mmu_resize_locked(desc, fast);
+    tlb_mmu_resize_locked(desc, fast, now);
     tlb_mmu_flush_locked(desc, fast);
 }
 
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
     CPUArchState *env = cpu->env_ptr;
     uint16_t asked = data.host_int;
     uint16_t all_dirty, work, to_clean;
+    int64_t now = get_clock_realtime();
 
     assert_cpu_is_self(cpu);
 
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 
     for (work = to_clean; work != 0; work &= work - 1) {
         int mmu_idx = ctz32(work);
-        tlb_flush_one_mmuidx_locked(env, mmu_idx);
+        tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
     }
 
     qemu_spin_unlock(&env_tlb(env)->c.lock);
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
         tlb_debug("forcing full flush midx %d ("
                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
                   midx, lp_addr, lp_mask);
-        tlb_flush_one_mmuidx_locked(env, midx);
+        tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
     } else {
         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
             tlb_n_used_entries_dec(env, midx);
-- 
2.20.1

The following changes since commit 2ecfc0657afa5d29a373271b342f704a1a3c6737:

Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2020-12-10' into staging (2020-12-10 17:01:05 +0000)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20201210

for you to fetch changes up to 9e2658d62ebc23efe7df43fc0e306f129510d874:

accel/tcg: rename tcg-cpus functions to match module name (2020-12-10 17:44:10 -0600)

----------------------------------------------------------------
Split CpusAccel for tcg variants

----------------------------------------------------------------
Claudio Fontana (3):
      accel/tcg: split CpusAccel into three TCG variants
      accel/tcg: split tcg_start_vcpu_thread
      accel/tcg: rename tcg-cpus functions to match module name

accel/tcg/tcg-cpus-icount.h |  17 ++
 accel/tcg/tcg-cpus-rr.h     |  21 ++
 accel/tcg/tcg-cpus.h        |  12 +-
 accel/tcg/tcg-all.c         |  13 +-
 accel/tcg/tcg-cpus-icount.c | 147 +++++++++++++
 accel/tcg/tcg-cpus-mttcg.c  | 140 ++++++++++++
 accel/tcg/tcg-cpus-rr.c     | 305 ++++++++++++++++++++++++++
 accel/tcg/tcg-cpus.c        | 506 +-------------------------------------------
 softmmu/icount.c            |   2 +-
 accel/tcg/meson.build       |   9 +-
 10 files changed, 670 insertions(+), 502 deletions(-)
 create mode 100644 accel/tcg/tcg-cpus-icount.h
 create mode 100644 accel/tcg/tcg-cpus-rr.h
 create mode 100644 accel/tcg/tcg-cpus-icount.c
 create mode 100644 accel/tcg/tcg-cpus-mttcg.c
 create mode 100644 accel/tcg/tcg-cpus-rr.c

From: Claudio Fontana <cfontana@suse.de>

split up the CpusAccel tcg_cpus into three TCG variants:

tcg_cpus_rr (single threaded, round robin cpus)
tcg_cpus_icount (same as rr, but with instruction counting enabled)
tcg_cpus_mttcg (multi-threaded cpus)

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Claudio Fontana <cfontana@suse.de>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20201015143217.29337-2-cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-cpus-icount.h |  17 ++
 accel/tcg/tcg-cpus-mttcg.h  |  21 ++
 accel/tcg/tcg-cpus-rr.h     |  20 ++
 accel/tcg/tcg-cpus.h        |  13 +-
 accel/tcg/tcg-all.c         |   8 +-
 accel/tcg/tcg-cpus-icount.c | 147 +++++++++++
 accel/tcg/tcg-cpus-mttcg.c  | 117 +++++++++
 accel/tcg/tcg-cpus-rr.c     | 270 ++++++++++++++++++++
 accel/tcg/tcg-cpus.c        | 484 ++----------------------------------
 softmmu/icount.c            |   2 +-
 accel/tcg/meson.build       |   9 +-
 11 files changed, 646 insertions(+), 462 deletions(-)
 create mode 100644 accel/tcg/tcg-cpus-icount.h
 create mode 100644 accel/tcg/tcg-cpus-mttcg.h
 create mode 100644 accel/tcg/tcg-cpus-rr.h
 create mode 100644 accel/tcg/tcg-cpus-icount.c
 create mode 100644 accel/tcg/tcg-cpus-mttcg.c
 create mode 100644 accel/tcg/tcg-cpus-rr.c

diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-icount.h
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Single Threaded vCPUs implementation using instruction counting
+ *
+ * Copyright 2020 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TCG_CPUS_ICOUNT_H
+#define TCG_CPUS_ICOUNT_H
+
+void handle_icount_deadline(void);
+void prepare_icount_for_run(CPUState *cpu);
+void process_icount_data(CPUState *cpu);
+
+#endif /* TCG_CPUS_ICOUNT_H */
diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-mttcg.h
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Multi Threaded vCPUs implementation
+ *
+ * Copyright 2020 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TCG_CPUS_MTTCG_H
+#define TCG_CPUS_MTTCG_H
+
+/*
+ * In the multi-threaded case each vCPU has its own thread. The TLS
+ * variable current_cpu can be used deep in the code to find the
+ * current CPUState for a given thread.
+ */
+
+void *tcg_cpu_thread_fn(void *arg);
+
+#endif /* TCG_CPUS_MTTCG_H */
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Single Threaded vCPUs implementation
+ *
+ * Copyright 2020 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TCG_CPUS_RR_H
+#define TCG_CPUS_RR_H
+
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
+
+/* Kick all RR vCPUs. */
+void qemu_cpu_kick_rr_cpus(CPUState *unused);
+
+void *tcg_rr_cpu_thread_fn(void *arg);
+
+#endif /* TCG_CPUS_RR_H */
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.h
+++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@
 /*
- * Accelerator CPUS Interface
+ * QEMU TCG vCPU common functionality
+ *
+ * Functionality common to all TCG vcpu variants: mttcg, rr and icount.
  *
  * Copyright 2020 SUSE LLC
  *
@@ -XXX,XX +XXX,XX @@
 
 #include "sysemu/cpus.h"
 
-extern const CpusAccel tcg_cpus;
+extern const CpusAccel tcg_cpus_mttcg;
+extern const CpusAccel tcg_cpus_icount;
+extern const CpusAccel tcg_cpus_rr;
+
+void tcg_start_vcpu_thread(CPUState *cpu);
+void qemu_tcg_destroy_vcpu(CPUState *cpu);
+int tcg_cpu_exec(CPUState *cpu);
+void tcg_handle_interrupt(CPUState *cpu, int mask);
 
 #endif /* TCG_CPUS_H */
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
 
     tcg_exec_init(s->tb_size * 1024 * 1024);
     mttcg_enabled = s->mttcg_enabled;
-    cpus_register_accel(&tcg_cpus);
 
+    if (mttcg_enabled) {
+        cpus_register_accel(&tcg_cpus_mttcg);
+    } else if (icount_enabled()) {
+        cpus_register_accel(&tcg_cpus_icount);
+    } else {
+        cpus_register_accel(&tcg_cpus_rr);
+    }
     return 0;
 }
 
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Single Threaded vCPUs implementation using instruction counting
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/tcg.h"
+#include "sysemu/replay.h"
+#include "qemu/main-loop.h"
+#include "qemu/guest-random.h"
+#include "exec/exec-all.h"
+#include "hw/boards.h"
+
+#include "tcg-cpus.h"
+#include "tcg-cpus-icount.h"
+#include "tcg-cpus-rr.h"
+
+static int64_t tcg_get_icount_limit(void)
+{
+    int64_t deadline;
+
+    if (replay_mode != REPLAY_MODE_PLAY) {
+        /*
+         * Include all the timers, because they may need an attention.
+         * Too long CPU execution may create unnecessary delay in UI.
+         */
+        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                              QEMU_TIMER_ATTR_ALL);
+        /* Check realtime timers, because they help with input processing */
+        deadline = qemu_soonest_timeout(deadline,
+                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
+                                           QEMU_TIMER_ATTR_ALL));
+
+        /*
+         * Maintain prior (possibly buggy) behaviour where if no deadline
+         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
+         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
+         * nanoseconds.
+         */
+        if ((deadline < 0) || (deadline > INT32_MAX)) {
+            deadline = INT32_MAX;
+        }
+
+        return icount_round(deadline);
+    } else {
+        return replay_get_instructions();
+    }
+}
+
+static void notify_aio_contexts(void)
+{
+    /* Wake up other AioContexts.  */
+    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+    qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
+}
+
+void handle_icount_deadline(void)
+{
+    assert(qemu_in_vcpu_thread());
+    int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                                  QEMU_TIMER_ATTR_ALL);
+
+    if (deadline == 0) {
+        notify_aio_contexts();
+    }
+}
+
+void prepare_icount_for_run(CPUState *cpu)
+{
+    int insns_left;
+
+    /*
+     * These should always be cleared by process_icount_data after
+     * each vCPU execution. However u16.high can be raised
+     * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
+     */
+    g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
+    g_assert(cpu->icount_extra == 0);
+
+    cpu->icount_budget = tcg_get_icount_limit();
+    insns_left = MIN(0xffff, cpu->icount_budget);
+    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
+    cpu->icount_extra = cpu->icount_budget - insns_left;
+
+    replay_mutex_lock();
+
+    if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
+        notify_aio_contexts();
+    }
+}
+
+void process_icount_data(CPUState *cpu)
+{
+    /* Account for executed instructions */
+    icount_update(cpu);
+
+    /* Reset the counters */
+    cpu_neg(cpu)->icount_decr.u16.low = 0;
+    cpu->icount_extra = 0;
+    cpu->icount_budget = 0;
+
+    replay_account_executed_instructions();
+
+    replay_mutex_unlock();
+}
+
+static void icount_handle_interrupt(CPUState *cpu, int mask)
+{
+    int old_mask = cpu->interrupt_request;
+
+    tcg_handle_interrupt(cpu, mask);
+    if (qemu_cpu_is_self(cpu) &&
+        !cpu->can_do_io
+        && (mask & ~old_mask) != 0) {
+        cpu_abort(cpu, "Raised interrupt while not in I/O function");
+    }
+}
+
+const CpusAccel tcg_cpus_icount = {
+    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
+
+    .handle_interrupt = icount_handle_interrupt,
+    .get_virtual_clock = icount_get,
+    .get_elapsed_ticks = icount_get,
+};
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Multi Threaded vCPUs implementation
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/tcg.h"
+#include "sysemu/replay.h"
+#include "qemu/main-loop.h"
+#include "qemu/guest-random.h"
+#include "exec/exec-all.h"
+#include "hw/boards.h"
+
+#include "tcg-cpus.h"
+#include "tcg-cpus-mttcg.h"
+
+/*
+ * In the multi-threaded case each vCPU has its own thread. The TLS
+ * variable current_cpu can be used deep in the code to find the
+ * current CPUState for a given thread.
+ */
+
+void *tcg_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+
+    assert(tcg_enabled());
+    g_assert(!icount_enabled());
+
+    rcu_register_thread();
+    tcg_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
+    current_cpu = cpu;
+    cpu_thread_signal_created(cpu);
+    qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+    /* process any pending work */
+    cpu->exit_request = 1;
+
+    do {
+        if (cpu_can_run(cpu)) {
+            int r;
+            qemu_mutex_unlock_iothread();
+            r = tcg_cpu_exec(cpu);
+            qemu_mutex_lock_iothread();
+            switch (r) {
+            case EXCP_DEBUG:
+                cpu_handle_guest_debug(cpu);
+                break;
+            case EXCP_HALTED:
+                /*
+                 * during start-up the vCPU is reset and the thread is
+                 * kicked several times. If we don't ensure we go back
+                 * to sleep in the halted state we won't cleanly
+                 * start-up when the vCPU is enabled.
+                 *
+                 * cpu->halted should ensure we sleep in wait_io_event
+                 */
+                g_assert(cpu->halted);
+                break;
+            case EXCP_ATOMIC:
+                qemu_mutex_unlock_iothread();
+                cpu_exec_step_atomic(cpu);
+                qemu_mutex_lock_iothread();
+            default:
+                /* Ignore everything else? */
+                break;
+            }
+        }
+
+        qatomic_mb_set(&cpu->exit_request, 0);
+        qemu_wait_io_event(cpu);
+    } while (!cpu->unplug || cpu_can_run(cpu));
+
+    qemu_tcg_destroy_vcpu(cpu);
+    qemu_mutex_unlock_iothread();
+    rcu_unregister_thread();
+    return NULL;
+}
+
+static void mttcg_kick_vcpu_thread(CPUState *cpu)
+{
+    cpu_exit(cpu);
+}
+
+const CpusAccel tcg_cpus_mttcg = {
+    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .kick_vcpu_thread = mttcg_kick_vcpu_thread,
+
+    .handle_interrupt = tcg_handle_interrupt,
+};
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@
+/*
+ * QEMU TCG Single Threaded vCPUs implementation
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/tcg.h"
+#include "sysemu/replay.h"
+#include "qemu/main-loop.h"
+#include "qemu/guest-random.h"
+#include "exec/exec-all.h"
+#include "hw/boards.h"
+
+#include "tcg-cpus.h"
+#include "tcg-cpus-rr.h"
+#include "tcg-cpus-icount.h"
+
+/* Kick all RR vCPUs */
+void qemu_cpu_kick_rr_cpus(CPUState *unused)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_exit(cpu);
+    };
+}
+
+/*
+ * TCG vCPU kick timer
+ *
+ * The kick timer is responsible for moving single threaded vCPU
+ * emulation on to the next vCPU. If more than one vCPU is running a
+ * timer event with force a cpu->exit so the next vCPU can get
+ * scheduled.
+ *
+ * The timer is removed if all vCPUs are idle and restarted again once
+ * idleness is complete.
+ */
+
+static QEMUTimer *tcg_kick_vcpu_timer;
+static CPUState *tcg_current_rr_cpu;
+
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
+
+static inline int64_t qemu_tcg_next_kick(void)
+{
+    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
+}
+
+/* Kick the currently round-robin scheduled vCPU to next */
+static void qemu_cpu_kick_rr_next_cpu(void)
+{
+    CPUState *cpu;
+    do {
+        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
+        if (cpu) {
+            cpu_exit(cpu);
+        }
+    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
+}
+
+static void kick_tcg_thread(void *opaque)
+{
+    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+    qemu_cpu_kick_rr_next_cpu();
+}
+
+static void start_tcg_kick_timer(void)
+{
+    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
+        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                           kick_tcg_thread, NULL);
+    }
+    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
+        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+    }
+}
+
+static void stop_tcg_kick_timer(void)
+{
+    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
+        timer_del(tcg_kick_vcpu_timer);
+    }
+}
+
+static void qemu_tcg_rr_wait_io_event(void)
+{
+    CPUState *cpu;
+
+    while (all_cpu_threads_idle()) {
+        stop_tcg_kick_timer();
+        qemu_cond_wait_iothread(first_cpu->halt_cond);
+    }
+
+    start_tcg_kick_timer();
+
+    CPU_FOREACH(cpu) {
+        qemu_wait_io_event_common(cpu);
+    }
+}
+
+/*
+ * Destroy any remaining vCPUs which have been unplugged and have
+ * finished running
+ */
+static void deal_with_unplugged_cpus(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->unplug && !cpu_can_run(cpu)) {
+            qemu_tcg_destroy_vcpu(cpu);
+            break;
+        }
+    }
+}
+
+/*
+ * In the single-threaded case each vCPU is simulated in turn. If
+ * there is more than a single vCPU we create a simple timer to kick
+ * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
+ * This is done explicitly rather than relying on side-effects
+ * elsewhere.
+ */
+
+void *tcg_rr_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+
+    assert(tcg_enabled());
+    rcu_register_thread();
+    tcg_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
+    cpu_thread_signal_created(cpu);
+    qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+    /* wait for initial kick-off after machine start */
+    while (first_cpu->stopped) {
+        qemu_cond_wait_iothread(first_cpu->halt_cond);
+
+        /* process any pending work */
+        CPU_FOREACH(cpu) {
+            current_cpu = cpu;
+            qemu_wait_io_event_common(cpu);
+        }
+    }
+
+    start_tcg_kick_timer();
+
+    cpu = first_cpu;
+
+    /* process any pending work */
+    cpu->exit_request = 1;
+
+    while (1) {
+        qemu_mutex_unlock_iothread();
+        replay_mutex_lock();
+        qemu_mutex_lock_iothread();
+
+        if (icount_enabled()) {
+            /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
+            icount_account_warp_timer();
+            /*
+             * Run the timers here.  This is much more efficient than
+             * waking up the I/O thread and waiting for completion.
+             */
+            handle_icount_deadline();
+        }
+
+        replay_mutex_unlock();
+
+        if (!cpu) {
+            cpu = first_cpu;
+        }
+
+        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
+
+            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
+            current_cpu = cpu;
+
+            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
+                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+
+            if (cpu_can_run(cpu)) {
+                int r;
+
+                qemu_mutex_unlock_iothread();
+                if (icount_enabled()) {
+                    prepare_icount_for_run(cpu);
+                }
+                r = tcg_cpu_exec(cpu);
+                if (icount_enabled()) {
+                    process_icount_data(cpu);
+                }
+                qemu_mutex_lock_iothread();
+
+                if (r == EXCP_DEBUG) {
+                    cpu_handle_guest_debug(cpu);
+                    break;
+                } else if (r == EXCP_ATOMIC) {
+                    qemu_mutex_unlock_iothread();
+                    cpu_exec_step_atomic(cpu);
+                    qemu_mutex_lock_iothread();
+                    break;
+                }
+            } else if (cpu->stop) {
+                if (cpu->unplug) {
+                    cpu = CPU_NEXT(cpu);
+                }
+                break;
+            }
+
+            cpu = CPU_NEXT(cpu);
+        } /* while (cpu && !cpu->exit_request).. */
+
+        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
+        qatomic_set(&tcg_current_rr_cpu, NULL);
+
+        if (cpu && cpu->exit_request) {
+            qatomic_mb_set(&cpu->exit_request, 0);
+        }
+
+        if (icount_enabled() && all_cpu_threads_idle()) {
+            /*
+             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
+             * in the main_loop, wake it up in order to start the warp timer.
+             */
+            qemu_notify_event();
+        }
+
+        qemu_tcg_rr_wait_io_event();
+        deal_with_unplugged_cpus();
+    }
+
+    rcu_unregister_thread();
+    return NULL;
+}
+
+const CpusAccel tcg_cpus_rr = {
+    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
+
+    .handle_interrupt = tcg_handle_interrupt,
+};
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.c
+++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
 /*
- * QEMU System Emulator
+ * QEMU TCG vCPU common functionality
+ *
+ * Functionality common to all TCG vCPU variants: mttcg, rr and icount.
  *
  * Copyright (c) 2003-2008 Fabrice Bellard
  * Copyright (c) 2014 Red Hat Inc.
@@ -XXX,XX +XXX,XX @@
 #include "hw/boards.h"
 
 #include "tcg-cpus.h"
+#include "tcg-cpus-mttcg.h"
+#include "tcg-cpus-rr.h"
 
-/* Kick all RR vCPUs */
-static void qemu_cpu_kick_rr_cpus(void)
-{
-    CPUState *cpu;
+/* common functionality among all TCG variants */
 
-    CPU_FOREACH(cpu) {
-        cpu_exit(cpu);
-    };
-}
-
-static void tcg_kick_vcpu_thread(CPUState *cpu)
-{
-    if (qemu_tcg_mttcg_enabled()) {
-        cpu_exit(cpu);
-    } else {
-        qemu_cpu_kick_rr_cpus();
-    }
-}
-
-/*
- * TCG vCPU kick timer
- *
- * The kick timer is responsible for moving single threaded vCPU
- * emulation on to the next vCPU. If more than one vCPU is running a
- * timer event with force a cpu->exit so the next vCPU can get
- * scheduled.
- *
- * The timer is removed if all vCPUs are idle and restarted again once
- * idleness is complete.
- */
-
-static QEMUTimer *tcg_kick_vcpu_timer;
-static CPUState *tcg_current_rr_cpu;
-
-#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
-
-static inline int64_t qemu_tcg_next_kick(void)
-{
-    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
-}
-
-/* Kick the currently round-robin scheduled vCPU to next */
-static void qemu_cpu_kick_rr_next_cpu(void)
-{
-    CPUState *cpu;
-    do {
-        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
-        if (cpu) {
-            cpu_exit(cpu);
-        }
-    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
-}
-
-static void kick_tcg_thread(void *opaque)
-{
-    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
-    qemu_cpu_kick_rr_next_cpu();
-}
-
-static void start_tcg_kick_timer(void)
-{
-    assert(!mttcg_enabled);
-    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
-        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                           kick_tcg_thread, NULL);
-    }
-    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
-        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
-    }
-}
-
-static void stop_tcg_kick_timer(void)
-{
-    assert(!mttcg_enabled);
-    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
-        timer_del(tcg_kick_vcpu_timer);
-    }
-}
-
-static void qemu_tcg_destroy_vcpu(CPUState *cpu)
-{
-}
-
-static void qemu_tcg_rr_wait_io_event(void)
-{
-    CPUState *cpu;
-
-    while (all_cpu_threads_idle()) {
-        stop_tcg_kick_timer();
-        qemu_cond_wait_iothread(first_cpu->halt_cond);
-    }
-
-    start_tcg_kick_timer();
-
-    CPU_FOREACH(cpu) {
-        qemu_wait_io_event_common(cpu);
-    }
-}
-
-static int64_t tcg_get_icount_limit(void)
-{
-    int64_t deadline;
-
-    if (replay_mode != REPLAY_MODE_PLAY) {
-        /*
-         * Include all the timers, because they may need an attention.
-         * Too long CPU execution may create unnecessary delay in UI.
-         */
-        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
-                                              QEMU_TIMER_ATTR_ALL);
-        /* Check realtime timers, because they help with input processing */
-        deadline = qemu_soonest_timeout(deadline,
-                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
-                                           QEMU_TIMER_ATTR_ALL));
-
-        /*
-         * Maintain prior (possibly buggy) behaviour where if no deadline
-         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
-         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
-         * nanoseconds.
-         */
-        if ((deadline < 0) || (deadline > INT32_MAX)) {
-            deadline = INT32_MAX;
-        }
-
-        return icount_round(deadline);
-    } else {
-        return replay_get_instructions();
-    }
-}
-
-static void notify_aio_contexts(void)
-{
-    /* Wake up other AioContexts.  */
-    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-    qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
-}
-
-static void handle_icount_deadline(void)
-{
-    assert(qemu_in_vcpu_thread());
-    if (icount_enabled()) {
-        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
-                                                      QEMU_TIMER_ATTR_ALL);
-
-        if (deadline == 0) {
-            notify_aio_contexts();
-        }
-    }
-}
-
-static void prepare_icount_for_run(CPUState *cpu)
-{
-    if (icount_enabled()) {
-        int insns_left;
-
-        /*
-         * These should always be cleared by process_icount_data after
-         * each vCPU execution. However u16.high can be raised
-         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
-         */
-        g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
-        g_assert(cpu->icount_extra == 0);
-
-        cpu->icount_budget = tcg_get_icount_limit();
-        insns_left = MIN(0xffff, cpu->icount_budget);
-        cpu_neg(cpu)->icount_decr.u16.low = insns_left;
-        cpu->icount_extra = cpu->icount_budget - insns_left;
-
-        replay_mutex_lock();
-
-        if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
-            notify_aio_contexts();
-        }
-    }
-}
-
-static void process_icount_data(CPUState *cpu)
-{
-    if (icount_enabled()) {
-        /* Account for executed instructions */
-        icount_update(cpu);
-
-        /* Reset the counters */
-        cpu_neg(cpu)->icount_decr.u16.low = 0;
-        cpu->icount_extra = 0;
-        cpu->icount_budget = 0;
-
-        replay_account_executed_instructions();
-
-        replay_mutex_unlock();
-    }
-}
-
-static int tcg_cpu_exec(CPUState *cpu)
-{
-    int ret;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
-
-    assert(tcg_enabled());
-#ifdef CONFIG_PROFILER
-    ti = profile_getclock();
-#endif
-    cpu_exec_start(cpu);
-    ret = cpu_exec(cpu);
-    cpu_exec_end(cpu);
-#ifdef CONFIG_PROFILER
-    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
-                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
-#endif
-    return ret;
-}
-
-/*
- * Destroy any remaining vCPUs which have been unplugged and have
- * finished running
- */
-static void deal_with_unplugged_cpus(void)
-{
-    CPUState *cpu;
-
-    CPU_FOREACH(cpu) {
-        if (cpu->unplug && !cpu_can_run(cpu)) {
-            qemu_tcg_destroy_vcpu(cpu);
-            cpu_thread_signal_destroyed(cpu);
-            break;
-        }
-    }
-}
-
-/*
- * Single-threaded TCG
- *
- * In the single-threaded case each vCPU is simulated in turn. If
- * there is more than a single vCPU we create a simple timer to kick
- * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
- * This is done explicitly rather than relying on side-effects
- * elsewhere.
- */
-
-static void *tcg_rr_cpu_thread_fn(void *arg)
-{
-    CPUState *cpu = arg;
-
-    assert(tcg_enabled());
-    rcu_register_thread();
-    tcg_register_thread();
-
-    qemu_mutex_lock_iothread();
-    qemu_thread_get_self(cpu->thread);
-
-    cpu->thread_id = qemu_get_thread_id();
-    cpu->can_do_io = 1;
-    cpu_thread_signal_created(cpu);
-    qemu_guest_random_seed_thread_part2(cpu->random_seed);
-
-    /* wait for initial kick-off after machine start */
-    while (first_cpu->stopped) {
-        qemu_cond_wait_iothread(first_cpu->halt_cond);
-
-        /* process any pending work */
-        CPU_FOREACH(cpu) {
-            current_cpu = cpu;
-            qemu_wait_io_event_common(cpu);
-        }
-    }
-
-    start_tcg_kick_timer();
-
-    cpu = first_cpu;
-
-    /* process any pending work */
-    cpu->exit_request = 1;
-
-    while (1) {
-        qemu_mutex_unlock_iothread();
-        replay_mutex_lock();
-        qemu_mutex_lock_iothread();
-        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
-        icount_account_warp_timer();
-
-        /*
-         * Run the timers here.  This is much more efficient than
-         * waking up the I/O thread and waiting for completion.
-         */
-        handle_icount_deadline();
-
-        replay_mutex_unlock();
-
-        if (!cpu) {
-            cpu = first_cpu;
-        }
-
-        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
-
-            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
-            current_cpu = cpu;
-
-            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
-                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
-
-            if (cpu_can_run(cpu)) {
-                int r;
-
-                qemu_mutex_unlock_iothread();
-                prepare_icount_for_run(cpu);
-
-                r = tcg_cpu_exec(cpu);
-
-                process_icount_data(cpu);
-                qemu_mutex_lock_iothread();
-
-                if (r == EXCP_DEBUG) {
-                    cpu_handle_guest_debug(cpu);
-                    break;
-                } else if (r == EXCP_ATOMIC) {
-                    qemu_mutex_unlock_iothread();
-                    cpu_exec_step_atomic(cpu);
-                    qemu_mutex_lock_iothread();
-                    break;
-                }
-            } else if (cpu->stop) {
-                if (cpu->unplug) {
-                    cpu = CPU_NEXT(cpu);
-                }
-                break;
-            }
-
-            cpu = CPU_NEXT(cpu);
-        } /* while (cpu && !cpu->exit_request).. */
-
-        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
-        qatomic_set(&tcg_current_rr_cpu, NULL);
-
-        if (cpu && cpu->exit_request) {
-            qatomic_mb_set(&cpu->exit_request, 0);
-        }
-
-        if (icount_enabled() && all_cpu_threads_idle()) {
-            /*
-             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
-             * in the main_loop, wake it up in order to start the warp timer.
-             */
-            qemu_notify_event();
-        }
-
-        qemu_tcg_rr_wait_io_event();
-        deal_with_unplugged_cpus();
-    }
-
-    rcu_unregister_thread();
-    return NULL;
-}
-
-/*
- * Multi-threaded TCG
- *
- * In the multi-threaded case each vCPU has its own thread. The TLS
- * variable current_cpu can be used deep in the code to find the
- * current CPUState for a given thread.
- */
-
-static void *tcg_cpu_thread_fn(void *arg)
-{
-    CPUState *cpu = arg;
-
-    assert(tcg_enabled());
-    g_assert(!icount_enabled());
-
-    rcu_register_thread();
-    tcg_register_thread();
-
-    qemu_mutex_lock_iothread();
-    qemu_thread_get_self(cpu->thread);
-
-    cpu->thread_id = qemu_get_thread_id();
-    cpu->can_do_io = 1;
-    current_cpu = cpu;
-    cpu_thread_signal_created(cpu);
-    qemu_guest_random_seed_thread_part2(cpu->random_seed);
-
-    /* process any pending work */
-    cpu->exit_request = 1;
-
-    do {
-        if (cpu_can_run(cpu)) {
-            int r;
-            qemu_mutex_unlock_iothread();
-            r = tcg_cpu_exec(cpu);
-            qemu_mutex_lock_iothread();
-            switch (r) {
-            case EXCP_DEBUG:
-                cpu_handle_guest_debug(cpu);
-                break;
-            case EXCP_HALTED:
-                /*
-                 * during start-up the vCPU is reset and the thread is
-                 * kicked several times. If we don't ensure we go back
-                 * to sleep in the halted state we won't cleanly
-                 * start-up when the vCPU is enabled.
-                 *
-                 * cpu->halted should ensure we sleep in wait_io_event
-                 */
-                g_assert(cpu->halted);
-                break;
-            case EXCP_ATOMIC:
-                qemu_mutex_unlock_iothread();
-                cpu_exec_step_atomic(cpu);
-                qemu_mutex_lock_iothread();
-            default:
-                /* Ignore everything else? */
-                break;
-            }
-        }
-
-        qatomic_mb_set(&cpu->exit_request, 0);
-        qemu_wait_io_event(cpu);
-    } while (!cpu->unplug || cpu_can_run(cpu));
-
-    qemu_tcg_destroy_vcpu(cpu);
-    cpu_thread_signal_destroyed(cpu);
-    qemu_mutex_unlock_iothread();
-    rcu_unregister_thread();
-    return NULL;
-}
-
-static void tcg_start_vcpu_thread(CPUState *cpu)
+void tcg_start_vcpu_thread(CPUState *cpu)
 {
     char thread_name[VCPU_THREAD_NAME_SIZE];
     static QemuCond *single_tcg_halt_cond;
@@ -XXX,XX +XXX,XX @@ static void tcg_start_vcpu_thread(CPUState *cpu)
     }
 }
 
-static int64_t tcg_get_virtual_clock(void)
+void qemu_tcg_destroy_vcpu(CPUState *cpu)
 {
-    if (icount_enabled()) {
-        return icount_get();
-    }
-    return cpu_get_clock();
+    cpu_thread_signal_destroyed(cpu);
 }
 
-static int64_t tcg_get_elapsed_ticks(void)
+int tcg_cpu_exec(CPUState *cpu)
 {
-    if (icount_enabled()) {
-        return icount_get();
-    }
-    return cpu_get_ticks();
+    int ret;
+#ifdef CONFIG_PROFILER
+    int64_t ti;
+#endif
+    assert(tcg_enabled());
+#ifdef CONFIG_PROFILER
+    ti = profile_getclock();
+#endif
+    cpu_exec_start(cpu);
+    ret = cpu_exec(cpu);
+    cpu_exec_end(cpu);
+#ifdef CONFIG_PROFILER
+    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
+                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
+#endif
+    return ret;
 }
 
 /* mask must never be zero, except for A20 change call */
-static void tcg_handle_interrupt(CPUState *cpu, int mask)
+void tcg_handle_interrupt(CPUState *cpu, int mask)
 {
-    int old_mask;
     g_assert(qemu_mutex_iothread_locked());
 
-    old_mask = cpu->interrupt_request;
     cpu->interrupt_request |= mask;
 
     /*
@@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
         qemu_cpu_kick(cpu);
     } else {
         qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
-        if (icount_enabled() &&
-            !cpu->can_do_io
-            && (mask & ~old_mask) != 0) {
-            cpu_abort(cpu, "Raised interrupt while not in I/O function");
-        }
     }
 }
-
-const CpusAccel tcg_cpus = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
-    .kick_vcpu_thread = tcg_kick_vcpu_thread,
-
-    .handle_interrupt = tcg_handle_interrupt,
-
-    .get_virtual_clock = tcg_get_virtual_clock,
-    .get_elapsed_ticks = tcg_get_elapsed_ticks,
-};
diff --git a/softmmu/icount.c b/softmmu/icount.c
index XXXXXXX..XXXXXXX 100644
--- a/softmmu/icount.c
+++ b/softmmu/icount.c
@@ -XXX,XX +XXX,XX @@ void icount_start_warp_timer(void)
 
 void icount_account_warp_timer(void)
 {
-    if (!icount_enabled() || !icount_sleep) {
+    if (!icount_sleep) {
         return;
     }
 
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
 tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
 specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
 
-specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files('tcg-all.c', 'cputlb.c', 'tcg-cpus.c'))
+specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
+  'tcg-all.c',
+  'cputlb.c',
+  'tcg-cpus.c',
+  'tcg-cpus-mttcg.c',
+  'tcg-cpus-icount.c',
+  'tcg-cpus-rr.c'
+))
-- 
2.25.1

From: Claudio Fontana <cfontana@suse.de>

after the initial split into 3 tcg variants, we proceed to also
split tcg_start_vcpu_thread.

We actually split it in 2 this time, since the icount variant
just uses the round robin function.

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Claudio Fontana <cfontana@suse.de>
Message-Id: <20201015143217.29337-3-cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-cpus-mttcg.h  | 21 --------------
 accel/tcg/tcg-cpus-rr.h     |  3 +-
 accel/tcg/tcg-cpus.h        |  1 -
 accel/tcg/tcg-all.c         |  5 ++++
 accel/tcg/tcg-cpus-icount.c |  2 +-
 accel/tcg/tcg-cpus-mttcg.c  | 29 +++++++++++++++++--
 accel/tcg/tcg-cpus-rr.c     | 39 +++++++++++++++++++++++--
 accel/tcg/tcg-cpus.c        | 58 -------------------------------------
 8 files changed, 71 insertions(+), 87 deletions(-)
 delete mode 100644 accel/tcg/tcg-cpus-mttcg.h

diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
deleted file mode 100644
index XXXXXXX..XXXXXXX
--- a/accel/tcg/tcg-cpus-mttcg.h
+++ /dev/null
@@ -XXX,XX +XXX,XX @@
-/*
- * QEMU TCG Multi Threaded vCPUs implementation
- *
- * Copyright 2020 SUSE LLC
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TCG_CPUS_MTTCG_H
-#define TCG_CPUS_MTTCG_H
-
-/*
- * In the multi-threaded case each vCPU has its own thread. The TLS
- * variable current_cpu can be used deep in the code to find the
- * current CPUState for a given thread.
- */
-
-void *tcg_cpu_thread_fn(void *arg);
-
-#endif /* TCG_CPUS_MTTCG_H */
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-rr.h
+++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
 /* Kick all RR vCPUs. */
 void qemu_cpu_kick_rr_cpus(CPUState *unused);
 
-void *tcg_rr_cpu_thread_fn(void *arg);
+/* start the round robin vcpu thread */
+void rr_start_vcpu_thread(CPUState *cpu);
 
 #endif /* TCG_CPUS_RR_H */
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.h
+++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
 extern const CpusAccel tcg_cpus_icount;
 extern const CpusAccel tcg_cpus_rr;
 
-void tcg_start_vcpu_thread(CPUState *cpu);
 void qemu_tcg_destroy_vcpu(CPUState *cpu);
 int tcg_cpu_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
     tcg_exec_init(s->tb_size * 1024 * 1024);
     mttcg_enabled = s->mttcg_enabled;
 
+    /*
+     * Initialize TCG regions
+     */
+    tcg_region_init();
+
     if (mttcg_enabled) {
         cpus_register_accel(&tcg_cpus_mttcg);
     } else if (icount_enabled()) {
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-icount.c
+++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
 }
 
 const CpusAccel tcg_cpus_icount = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = rr_start_vcpu_thread,
     .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 
     .handle_interrupt = icount_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-mttcg.c
+++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
 #include "hw/boards.h"
 
 #include "tcg-cpus.h"
-#include "tcg-cpus-mttcg.h"
 
 /*
  * In the multi-threaded case each vCPU has its own thread. The TLS
@@ -XXX,XX +XXX,XX @@
  * current CPUState for a given thread.
  */
 
-void *tcg_cpu_thread_fn(void *arg)
+static void *tcg_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -XXX,XX +XXX,XX @@ static void mttcg_kick_vcpu_thread(CPUState *cpu)
     cpu_exit(cpu);
 }
 
+static void mttcg_start_vcpu_thread(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    g_assert(tcg_enabled());
+
+    parallel_cpus = (current_machine->smp.max_cpus > 1);
+
+    cpu->thread = g_malloc0(sizeof(QemuThread));
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
+
+    /* create a thread per vCPU with TCG (MTTCG) */
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
+             cpu->cpu_index);
+
+    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
+
+#ifdef _WIN32
+    cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+}
+
 const CpusAccel tcg_cpus_mttcg = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = mttcg_start_vcpu_thread,
     .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 
     .handle_interrupt = tcg_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-rr.c
+++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
  * elsewhere.
  */
 
-void *tcg_rr_cpu_thread_fn(void *arg)
+static void *tcg_rr_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -XXX,XX +XXX,XX @@ void *tcg_rr_cpu_thread_fn(void *arg)
     return NULL;
 }
 
+void rr_start_vcpu_thread(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+    static QemuCond *single_tcg_halt_cond;
+    static QemuThread *single_tcg_cpu_thread;
+
+    g_assert(tcg_enabled());
+    parallel_cpus = false;
+
+    if (!single_tcg_cpu_thread) {
+        cpu->thread = g_malloc0(sizeof(QemuThread));
+        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+        qemu_cond_init(cpu->halt_cond);
+
+        /* share a single thread for all cpus with TCG */
+        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
+        qemu_thread_create(cpu->thread, thread_name,
+                           tcg_rr_cpu_thread_fn,
+                           cpu, QEMU_THREAD_JOINABLE);
+
+        single_tcg_halt_cond = cpu->halt_cond;
+        single_tcg_cpu_thread = cpu->thread;
+#ifdef _WIN32
+        cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+    } else {
+        /* we share the thread */
+        cpu->thread = single_tcg_cpu_thread;
+        cpu->halt_cond = single_tcg_halt_cond;
+        cpu->thread_id = first_cpu->thread_id;
+        cpu->can_do_io = 1;
+        cpu->created = true;
+    }
+}
+
 const CpusAccel tcg_cpus_rr = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = rr_start_vcpu_thread,
     .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 
     .handle_interrupt = tcg_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.c
+++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
 #include "hw/boards.h"
 
 #include "tcg-cpus.h"
-#include "tcg-cpus-mttcg.h"
-#include "tcg-cpus-rr.h"
 
 /* common functionality among all TCG variants */
 
-void tcg_start_vcpu_thread(CPUState *cpu)
-{
-    char thread_name[VCPU_THREAD_NAME_SIZE];
-    static QemuCond *single_tcg_halt_cond;
-    static QemuThread *single_tcg_cpu_thread;
-    static int tcg_region_inited;
-
-    assert(tcg_enabled());
-    /*
-     * Initialize TCG regions--once. Now is a good time, because:
-     * (1) TCG's init context, prologue and target globals have been set up.
-     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
-     *     -accel flag is processed, so the check doesn't work then).
-     */
-    if (!tcg_region_inited) {
-        tcg_region_inited = 1;
-        tcg_region_init();
-        parallel_cpus = qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
-    }
-
-    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
-        cpu->thread = g_malloc0(sizeof(QemuThread));
-        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
-        qemu_cond_init(cpu->halt_cond);
-
-        if (qemu_tcg_mttcg_enabled()) {
-            /* create a thread per vCPU with TCG (MTTCG) */
-            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
-                 cpu->cpu_index);
-
-            qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
-                               cpu, QEMU_THREAD_JOINABLE);
-
-        } else {
-            /* share a single thread for all cpus with TCG */
-            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
-            qemu_thread_create(cpu->thread, thread_name,
-                               tcg_rr_cpu_thread_fn,
-                               cpu, QEMU_THREAD_JOINABLE);
-
-            single_tcg_halt_cond = cpu->halt_cond;
-            single_tcg_cpu_thread = cpu->thread;
-        }
-#ifdef _WIN32
-        cpu->hThread = qemu_thread_get_handle(cpu->thread);
-#endif
-    } else {
-        /* For non-MTTCG cases we share the thread */
-        cpu->thread = single_tcg_cpu_thread;
-        cpu->halt_cond = single_tcg_halt_cond;
-        cpu->thread_id = first_cpu->thread_id;
-        cpu->can_do_io = 1;
-        cpu->created = true;
-    }
-}
-
 void qemu_tcg_destroy_vcpu(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
-- 
2.25.1

From: Claudio Fontana <cfontana@suse.de>

Signed-off-by: Claudio Fontana <cfontana@suse.de>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20201015143217.29337-4-cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-cpus-icount.h |  6 +--
 accel/tcg/tcg-cpus-rr.h     |  2 +-
 accel/tcg/tcg-cpus.h        |  6 +--
 accel/tcg/tcg-cpus-icount.c | 24 ++++++------
 accel/tcg/tcg-cpus-mttcg.c  | 10 ++---
 accel/tcg/tcg-cpus-rr.c     | 74 ++++++++++++++++++-------------------
 accel/tcg/tcg-cpus.c        |  6 +--
 7 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-icount.h
+++ b/accel/tcg/tcg-cpus-icount.h
@@ -XXX,XX +XXX,XX @@
 #ifndef TCG_CPUS_ICOUNT_H
 #define TCG_CPUS_ICOUNT_H
 
-void handle_icount_deadline(void);
-void prepare_icount_for_run(CPUState *cpu);
-void process_icount_data(CPUState *cpu);
+void icount_handle_deadline(void);
+void icount_prepare_for_run(CPUState *cpu);
+void icount_process_data(CPUState *cpu);
 
 #endif /* TCG_CPUS_ICOUNT_H */
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-rr.h
+++ b/accel/tcg/tcg-cpus-rr.h
@@ -XXX,XX +XXX,XX @@
 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 
 /* Kick all RR vCPUs. */
-void qemu_cpu_kick_rr_cpus(CPUState *unused);
+void rr_kick_vcpu_thread(CPUState *unused);
 
 /* start the round robin vcpu thread */
 void rr_start_vcpu_thread(CPUState *cpu);
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.h
+++ b/accel/tcg/tcg-cpus.h
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
 extern const CpusAccel tcg_cpus_icount;
 extern const CpusAccel tcg_cpus_rr;
 
-void qemu_tcg_destroy_vcpu(CPUState *cpu);
-int tcg_cpu_exec(CPUState *cpu);
-void tcg_handle_interrupt(CPUState *cpu, int mask);
+void tcg_cpus_destroy(CPUState *cpu);
+int tcg_cpus_exec(CPUState *cpu);
+void tcg_cpus_handle_interrupt(CPUState *cpu, int mask);
 
 #endif /* TCG_CPUS_H */
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-icount.c
+++ b/accel/tcg/tcg-cpus-icount.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg-cpus-icount.h"
 #include "tcg-cpus-rr.h"
 
-static int64_t tcg_get_icount_limit(void)
+static int64_t icount_get_limit(void)
 {
     int64_t deadline;
 
@@ -XXX,XX +XXX,XX @@ static int64_t tcg_get_icount_limit(void)
     }
 }
 
-static void notify_aio_contexts(void)
+static void icount_notify_aio_contexts(void)
 {
     /* Wake up other AioContexts.  */
     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
     qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 }
 
-void handle_icount_deadline(void)
+void icount_handle_deadline(void)
 {
     assert(qemu_in_vcpu_thread());
     int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
                                                   QEMU_TIMER_ATTR_ALL);
 
     if (deadline == 0) {
-        notify_aio_contexts();
+        icount_notify_aio_contexts();
     }
 }
 
-void prepare_icount_for_run(CPUState *cpu)
+void icount_prepare_for_run(CPUState *cpu)
 {
     int insns_left;
 
     /*
-     * These should always be cleared by process_icount_data after
+     * These should always be cleared by icount_process_data after
      * each vCPU execution. However u16.high can be raised
-     * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
+     * asynchronously by cpu_exit/cpu_interrupt/tcg_cpus_handle_interrupt
      */
     g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
     g_assert(cpu->icount_extra == 0);
 
-    cpu->icount_budget = tcg_get_icount_limit();
+    cpu->icount_budget = icount_get_limit();
     insns_left = MIN(0xffff, cpu->icount_budget);
     cpu_neg(cpu)->icount_decr.u16.low = insns_left;
     cpu->icount_extra = cpu->icount_budget - insns_left;
@@ -XXX,XX +XXX,XX @@ void prepare_icount_for_run(CPUState *cpu)
     replay_mutex_lock();
 
     if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
-        notify_aio_contexts();
+        icount_notify_aio_contexts();
     }
 }
 
-void process_icount_data(CPUState *cpu)
+void icount_process_data(CPUState *cpu)
 {
     /* Account for executed instructions */
     icount_update(cpu);
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
 {
     int old_mask = cpu->interrupt_request;
 
-    tcg_handle_interrupt(cpu, mask);
+    tcg_cpus_handle_interrupt(cpu, mask);
     if (qemu_cpu_is_self(cpu) &&
         !cpu->can_do_io
         && (mask & ~old_mask) != 0) {
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
 
 const CpusAccel tcg_cpus_icount = {
     .create_vcpu_thread = rr_start_vcpu_thread,
-    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
+    .kick_vcpu_thread = rr_kick_vcpu_thread,
 
     .handle_interrupt = icount_handle_interrupt,
     .get_virtual_clock = icount_get,
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-mttcg.c
+++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -XXX,XX +XXX,XX @@
  * current CPUState for a given thread.
  */
 
-static void *tcg_cpu_thread_fn(void *arg)
+static void *mttcg_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
         if (cpu_can_run(cpu)) {
             int r;
             qemu_mutex_unlock_iothread();
-            r = tcg_cpu_exec(cpu);
+            r = tcg_cpus_exec(cpu);
             qemu_mutex_lock_iothread();
             switch (r) {
             case EXCP_DEBUG:
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
         qemu_wait_io_event(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
-    qemu_tcg_destroy_vcpu(cpu);
+    tcg_cpus_destroy(cpu);
     qemu_mutex_unlock_iothread();
     rcu_unregister_thread();
     return NULL;
@@ -XXX,XX +XXX,XX @@ static void mttcg_start_vcpu_thread(CPUState *cpu)
     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
              cpu->cpu_index);
 
-    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
+    qemu_thread_create(cpu->thread, thread_name, mttcg_cpu_thread_fn,
                        cpu, QEMU_THREAD_JOINABLE);
 
 #ifdef _WIN32
@@ -XXX,XX +XXX,XX @@ const CpusAccel tcg_cpus_mttcg = {
     .create_vcpu_thread = mttcg_start_vcpu_thread,
     .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 
-    .handle_interrupt = tcg_handle_interrupt,
+    .handle_interrupt = tcg_cpus_handle_interrupt,
 };
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus-rr.c
+++ b/accel/tcg/tcg-cpus-rr.c
@@ -XXX,XX +XXX,XX @@
 #include "tcg-cpus-icount.h"
 
 /* Kick all RR vCPUs */
-void qemu_cpu_kick_rr_cpus(CPUState *unused)
+void rr_kick_vcpu_thread(CPUState *unused)
 {
     CPUState *cpu;
 
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick_rr_cpus(CPUState *unused)
  * idleness is complete.
  */
 
-static QEMUTimer *tcg_kick_vcpu_timer;
-static CPUState *tcg_current_rr_cpu;
+static QEMUTimer *rr_kick_vcpu_timer;
+static CPUState *rr_current_cpu;
 
 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 
-static inline int64_t qemu_tcg_next_kick(void)
+static inline int64_t rr_next_kick_time(void)
 {
     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 }
 
 /* Kick the currently round-robin scheduled vCPU to next */
-static void qemu_cpu_kick_rr_next_cpu(void)
+static void rr_kick_next_cpu(void)
 {
     CPUState *cpu;
     do {
-        cpu = qatomic_mb_read(&tcg_current_rr_cpu);
+        cpu = qatomic_mb_read(&rr_current_cpu);
         if (cpu) {
             cpu_exit(cpu);
         }
-    } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
+    } while (cpu != qatomic_mb_read(&rr_current_cpu));
 }
 
-static void kick_tcg_thread(void *opaque)
+static void rr_kick_thread(void *opaque)
 {
-    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
-    qemu_cpu_kick_rr_next_cpu();
+    timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
+    rr_kick_next_cpu();
 }
 
-static void start_tcg_kick_timer(void)
+static void rr_start_kick_timer(void)
 {
-    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
-        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                           kick_tcg_thread, NULL);
+    if (!rr_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
+        rr_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                           rr_kick_thread, NULL);
     }
-    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
-        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+    if (rr_kick_vcpu_timer && !timer_pending(rr_kick_vcpu_timer)) {
+        timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
     }
 }
 
-static void stop_tcg_kick_timer(void)
+static void rr_stop_kick_timer(void)
 {
-    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
-        timer_del(tcg_kick_vcpu_timer);
+    if (rr_kick_vcpu_timer && timer_pending(rr_kick_vcpu_timer)) {
+        timer_del(rr_kick_vcpu_timer);
     }
 }
 
-static void qemu_tcg_rr_wait_io_event(void)
+static void rr_wait_io_event(void)
 {
     CPUState *cpu;
 
     while (all_cpu_threads_idle()) {
-        stop_tcg_kick_timer();
+        rr_stop_kick_timer();
         qemu_cond_wait_iothread(first_cpu->halt_cond);
     }
 
-    start_tcg_kick_timer();
+    rr_start_kick_timer();
 
     CPU_FOREACH(cpu) {
         qemu_wait_io_event_common(cpu);
@@ -XXX,XX +XXX,XX @@ static void qemu_tcg_rr_wait_io_event(void)
  * Destroy any remaining vCPUs which have been unplugged and have
  * finished running
  */
-static void deal_with_unplugged_cpus(void)
+static void rr_deal_with_unplugged_cpus(void)
 {
     CPUState *cpu;
 
     CPU_FOREACH(cpu) {
         if (cpu->unplug && !cpu_can_run(cpu)) {
-            qemu_tcg_destroy_vcpu(cpu);
+            tcg_cpus_destroy(cpu);
             break;
         }
     }
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
  * elsewhere.
  */
 
-static void *tcg_rr_cpu_thread_fn(void *arg)
+static void *rr_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
         }
     }
 
-    start_tcg_kick_timer();
+    rr_start_kick_timer();
 
     cpu = first_cpu;
 
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
              * Run the timers here.  This is much more efficient than
              * waking up the I/O thread and waiting for completion.
              */
-            handle_icount_deadline();
+            icount_handle_deadline();
         }
 
         replay_mutex_unlock();
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
 
         while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
 
-            qatomic_mb_set(&tcg_current_rr_cpu, cpu);
+            qatomic_mb_set(&rr_current_cpu, cpu);
             current_cpu = cpu;
 
             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
 
                 qemu_mutex_unlock_iothread();
                 if (icount_enabled()) {
-                    prepare_icount_for_run(cpu);
+                    icount_prepare_for_run(cpu);
                 }
-                r = tcg_cpu_exec(cpu);
+                r = tcg_cpus_exec(cpu);
                 if (icount_enabled()) {
-                    process_icount_data(cpu);
+                    icount_process_data(cpu);
                 }
                 qemu_mutex_lock_iothread();
 
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
         } /* while (cpu && !cpu->exit_request).. */
 
         /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
-        qatomic_set(&tcg_current_rr_cpu, NULL);
+        qatomic_set(&rr_current_cpu, NULL);
 
         if (cpu && cpu->exit_request) {
             qatomic_mb_set(&cpu->exit_request, 0);
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
             qemu_notify_event();
         }
 
-        qemu_tcg_rr_wait_io_event();
-        deal_with_unplugged_cpus();
+        rr_wait_io_event();
+        rr_deal_with_unplugged_cpus();
     }
 
     rcu_unregister_thread();
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
         /* share a single thread for all cpus with TCG */
         snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
         qemu_thread_create(cpu->thread, thread_name,
-                           tcg_rr_cpu_thread_fn,
+                           rr_cpu_thread_fn,
                            cpu, QEMU_THREAD_JOINABLE);
 
         single_tcg_halt_cond = cpu->halt_cond;
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
 
 const CpusAccel tcg_cpus_rr = {
     .create_vcpu_thread = rr_start_vcpu_thread,
-    .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
+    .kick_vcpu_thread = rr_kick_vcpu_thread,
 
-    .handle_interrupt = tcg_handle_interrupt,
+    .handle_interrupt = tcg_cpus_handle_interrupt,
 };
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tcg-cpus.c
+++ b/accel/tcg/tcg-cpus.c
@@ -XXX,XX +XXX,XX @@
 
 /* common functionality among all TCG variants */
 
-void qemu_tcg_destroy_vcpu(CPUState *cpu)
+void tcg_cpus_destroy(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
 }
 
-int tcg_cpu_exec(CPUState *cpu)
+int tcg_cpus_exec(CPUState *cpu)
 {
     int ret;
 #ifdef CONFIG_PROFILER
@@ -XXX,XX +XXX,XX @@ int tcg_cpu_exec(CPUState *cpu)
 }
 
 /* mask must never be zero, except for A20 change call */
-void tcg_handle_interrupt(CPUState *cpu, int mask)
+void tcg_cpus_handle_interrupt(CPUState *cpu, int mask)
 {
     g_assert(qemu_mutex_iothread_locked());
 
-- 
2.25.1