[RFC v9 02/32] accel/tcg: split tcg_start_vcpu_thread

Claudio Fontana posted 22 patches 5 years, 2 months ago
Maintainers: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Bastian Koppelmann <kbastian@mail.uni-paderborn.de>, Sarah Harris <S.E.Harris@kent.ac.uk>, Stafford Horne <shorne@gmail.com>, Roman Bolshakov <r.bolshakov@yadro.com>, Paolo Bonzini <pbonzini@redhat.com>, Palmer Dabbelt <palmer@dabbelt.com>, David Hildenbrand <david@redhat.com>, Jason Wang <jasowang@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, "Edgar E. Iglesias" <edgar.iglesias@gmail.com>, Sergio Lopez <slp@redhat.com>, Michael Rolnik <mrolnik@gmail.com>, Sagar Karandikar <sagark@eecs.berkeley.edu>, Chris Wulff <crwulff@gmail.com>, "Hervé Poussineau" <hpoussin@reactos.org>, Michael Walle <michael@walle.cc>, Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>, Cameron Esfahani <dirty@apple.com>, Richard Henderson <richard.henderson@linaro.org>, Guan Xuetao <gxt@mprc.pku.edu.cn>, Marcelo Tosatti <mtosatti@redhat.com>, Wenchao Wang <wenchao.wang@intel.com>, Stefano Stabellini <sstabellini@kernel.org>, Marek Vasut <marex@denx.de>, Eduardo Habkost <ehabkost@redhat.com>, Peter Maydell <peter.maydell@linaro.org>, Cornelia Huck <cohuck@redhat.com>, Colin Xu <colin.xu@intel.com>, Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>, Alistair Francis <Alistair.Francis@wdc.com>, Anthony Perard <anthony.perard@citrix.com>, "Philippe Mathieu-Daudé" <f4bug@amsat.org>, Peter Xu <peterx@redhat.com>, Thomas Huth <thuth@redhat.com>, Aurelien Jarno <aurelien@aurel32.net>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Max Filippov <jcmvbkbc@gmail.com>, Anthony Green <green@moxielogic.com>, Artyom Tarasenko <atar4qemu@gmail.com>, Yoshinori Sato <ysato@users.sourceforge.jp>, Laurent Vivier <lvivier@redhat.com>, Laurent Vivier <laurent@vivier.eu>, Paul Durrant <paul@xen.org>, David Gibson <david@gibson.dropbear.id.au>, Sunil Muthuswamy <sunilmut@microsoft.com>
There is a newer version of this series
[RFC v9 02/32] accel/tcg: split tcg_start_vcpu_thread
Posted by Claudio Fontana 5 years, 2 months ago
after the initial split into 3 tcg variants, we proceed to also
split tcg_start_vcpu_thread.

We actually split it in 2 this time, since the icount variant
just uses the round robin function.

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Claudio Fontana <cfontana@suse.de>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-all.c         |  5 ++++
 accel/tcg/tcg-cpus-icount.c |  2 +-
 accel/tcg/tcg-cpus-mttcg.c  | 29 +++++++++++++++++--
 accel/tcg/tcg-cpus-mttcg.h  | 21 --------------
 accel/tcg/tcg-cpus-rr.c     | 39 +++++++++++++++++++++++--
 accel/tcg/tcg-cpus-rr.h     |  3 +-
 accel/tcg/tcg-cpus.c        | 58 -------------------------------------
 accel/tcg/tcg-cpus.h        |  1 -
 8 files changed, 71 insertions(+), 87 deletions(-)
 delete mode 100644 accel/tcg/tcg-cpus-mttcg.h

diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index e42a028043..1ac0b76515 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -105,6 +105,11 @@ static int tcg_init(MachineState *ms)
     tcg_exec_init(s->tb_size * 1024 * 1024);
     mttcg_enabled = s->mttcg_enabled;
 
+    /*
+     * Initialize TCG regions
+     */
+    tcg_region_init();
+
     if (mttcg_enabled) {
         cpus_register_accel(&tcg_cpus_mttcg);
     } else if (icount_enabled()) {
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
index d3af3afb6d..82dbe2cacf 100644
--- a/accel/tcg/tcg-cpus-icount.c
+++ b/accel/tcg/tcg-cpus-icount.c
@@ -138,7 +138,7 @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
 }
 
 const CpusAccel tcg_cpus_icount = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = rr_start_vcpu_thread,
     .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 
     .handle_interrupt = icount_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
index dac724fc85..f2b892a380 100644
--- a/accel/tcg/tcg-cpus-mttcg.c
+++ b/accel/tcg/tcg-cpus-mttcg.c
@@ -33,7 +33,6 @@
 #include "hw/boards.h"
 
 #include "tcg-cpus.h"
-#include "tcg-cpus-mttcg.h"
 
 /*
  * In the multi-threaded case each vCPU has its own thread. The TLS
@@ -41,7 +40,7 @@
  * current CPUState for a given thread.
  */
 
-void *tcg_cpu_thread_fn(void *arg)
+static void *tcg_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -109,8 +108,32 @@ static void mttcg_kick_vcpu_thread(CPUState *cpu)
     cpu_exit(cpu);
 }
 
+static void mttcg_start_vcpu_thread(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    g_assert(tcg_enabled());
+
+    parallel_cpus = (current_machine->smp.max_cpus > 1);
+
+    cpu->thread = g_malloc0(sizeof(QemuThread));
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
+
+    /* create a thread per vCPU with TCG (MTTCG) */
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
+             cpu->cpu_index);
+
+    qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
+
+#ifdef _WIN32
+    cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+}
+
 const CpusAccel tcg_cpus_mttcg = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = mttcg_start_vcpu_thread,
     .kick_vcpu_thread = mttcg_kick_vcpu_thread,
 
     .handle_interrupt = tcg_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
deleted file mode 100644
index d1bd771f49..0000000000
--- a/accel/tcg/tcg-cpus-mttcg.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * QEMU TCG Multi Threaded vCPUs implementation
- *
- * Copyright 2020 SUSE LLC
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TCG_CPUS_MTTCG_H
-#define TCG_CPUS_MTTCG_H
-
-/*
- * In the multi-threaded case each vCPU has its own thread. The TLS
- * variable current_cpu can be used deep in the code to find the
- * current CPUState for a given thread.
- */
-
-void *tcg_cpu_thread_fn(void *arg);
-
-#endif /* TCG_CPUS_MTTCG_H */
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
index ad50a3765f..f3b262bec7 100644
--- a/accel/tcg/tcg-cpus-rr.c
+++ b/accel/tcg/tcg-cpus-rr.c
@@ -144,7 +144,7 @@ static void deal_with_unplugged_cpus(void)
  * elsewhere.
  */
 
-void *tcg_rr_cpu_thread_fn(void *arg)
+static void *tcg_rr_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -262,8 +262,43 @@ void *tcg_rr_cpu_thread_fn(void *arg)
     return NULL;
 }
 
+void rr_start_vcpu_thread(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+    static QemuCond *single_tcg_halt_cond;
+    static QemuThread *single_tcg_cpu_thread;
+
+    g_assert(tcg_enabled());
+    parallel_cpus = false;
+
+    if (!single_tcg_cpu_thread) {
+        cpu->thread = g_malloc0(sizeof(QemuThread));
+        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+        qemu_cond_init(cpu->halt_cond);
+
+        /* share a single thread for all cpus with TCG */
+        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
+        qemu_thread_create(cpu->thread, thread_name,
+                           tcg_rr_cpu_thread_fn,
+                           cpu, QEMU_THREAD_JOINABLE);
+
+        single_tcg_halt_cond = cpu->halt_cond;
+        single_tcg_cpu_thread = cpu->thread;
+#ifdef _WIN32
+        cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+    } else {
+        /* we share the thread */
+        cpu->thread = single_tcg_cpu_thread;
+        cpu->halt_cond = single_tcg_halt_cond;
+        cpu->thread_id = first_cpu->thread_id;
+        cpu->can_do_io = 1;
+        cpu->created = true;
+    }
+}
+
 const CpusAccel tcg_cpus_rr = {
-    .create_vcpu_thread = tcg_start_vcpu_thread,
+    .create_vcpu_thread = rr_start_vcpu_thread,
     .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
 
     .handle_interrupt = tcg_handle_interrupt,
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
index 1936fd16ab..2e5943eda9 100644
--- a/accel/tcg/tcg-cpus-rr.h
+++ b/accel/tcg/tcg-cpus-rr.h
@@ -15,6 +15,7 @@
 /* Kick all RR vCPUs. */
 void qemu_cpu_kick_rr_cpus(CPUState *unused);
 
-void *tcg_rr_cpu_thread_fn(void *arg);
+/* start the round robin vcpu thread */
+void rr_start_vcpu_thread(CPUState *cpu);
 
 #endif /* TCG_CPUS_RR_H */
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
index f2b9bbf99e..86fd09545a 100644
--- a/accel/tcg/tcg-cpus.c
+++ b/accel/tcg/tcg-cpus.c
@@ -35,67 +35,9 @@
 #include "hw/boards.h"
 
 #include "tcg-cpus.h"
-#include "tcg-cpus-mttcg.h"
-#include "tcg-cpus-rr.h"
 
 /* common functionality among all TCG variants */
 
-void tcg_start_vcpu_thread(CPUState *cpu)
-{
-    char thread_name[VCPU_THREAD_NAME_SIZE];
-    static QemuCond *single_tcg_halt_cond;
-    static QemuThread *single_tcg_cpu_thread;
-    static int tcg_region_inited;
-
-    assert(tcg_enabled());
-    /*
-     * Initialize TCG regions--once. Now is a good time, because:
-     * (1) TCG's init context, prologue and target globals have been set up.
-     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
-     *     -accel flag is processed, so the check doesn't work then).
-     */
-    if (!tcg_region_inited) {
-        tcg_region_inited = 1;
-        tcg_region_init();
-        parallel_cpus = qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
-    }
-
-    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
-        cpu->thread = g_malloc0(sizeof(QemuThread));
-        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
-        qemu_cond_init(cpu->halt_cond);
-
-        if (qemu_tcg_mttcg_enabled()) {
-            /* create a thread per vCPU with TCG (MTTCG) */
-            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
-                 cpu->cpu_index);
-
-            qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
-                               cpu, QEMU_THREAD_JOINABLE);
-
-        } else {
-            /* share a single thread for all cpus with TCG */
-            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
-            qemu_thread_create(cpu->thread, thread_name,
-                               tcg_rr_cpu_thread_fn,
-                               cpu, QEMU_THREAD_JOINABLE);
-
-            single_tcg_halt_cond = cpu->halt_cond;
-            single_tcg_cpu_thread = cpu->thread;
-        }
-#ifdef _WIN32
-        cpu->hThread = qemu_thread_get_handle(cpu->thread);
-#endif
-    } else {
-        /* For non-MTTCG cases we share the thread */
-        cpu->thread = single_tcg_cpu_thread;
-        cpu->halt_cond = single_tcg_halt_cond;
-        cpu->thread_id = first_cpu->thread_id;
-        cpu->can_do_io = 1;
-        cpu->created = true;
-    }
-}
-
 void qemu_tcg_destroy_vcpu(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
index 279ba72e1f..b7ca954e13 100644
--- a/accel/tcg/tcg-cpus.h
+++ b/accel/tcg/tcg-cpus.h
@@ -18,7 +18,6 @@ extern const CpusAccel tcg_cpus_mttcg;
 extern const CpusAccel tcg_cpus_icount;
 extern const CpusAccel tcg_cpus_rr;
 
-void tcg_start_vcpu_thread(CPUState *cpu);
 void qemu_tcg_destroy_vcpu(CPUState *cpu);
 int tcg_cpu_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
-- 
2.26.2


Re: [RFC v9 02/32] accel/tcg: split tcg_start_vcpu_thread
Posted by Alex Bennée 5 years, 2 months ago
Claudio Fontana <cfontana@suse.de> writes:

> after the initial split into 3 tcg variants, we proceed to also
> split tcg_start_vcpu_thread.
>
> We actually split it in 2 this time, since the icount variant
> just uses the round robin function.
>
> Suggested-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: Claudio Fontana <cfontana@suse.de>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  accel/tcg/tcg-all.c         |  5 ++++
>  accel/tcg/tcg-cpus-icount.c |  2 +-
>  accel/tcg/tcg-cpus-mttcg.c  | 29 +++++++++++++++++--
>  accel/tcg/tcg-cpus-mttcg.h  | 21 --------------
>  accel/tcg/tcg-cpus-rr.c     | 39 +++++++++++++++++++++++--
>  accel/tcg/tcg-cpus-rr.h     |  3 +-
>  accel/tcg/tcg-cpus.c        | 58 -------------------------------------
>  accel/tcg/tcg-cpus.h        |  1 -
>  8 files changed, 71 insertions(+), 87 deletions(-)
>  delete mode 100644 accel/tcg/tcg-cpus-mttcg.h
>
> diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
> index e42a028043..1ac0b76515 100644
> --- a/accel/tcg/tcg-all.c
> +++ b/accel/tcg/tcg-all.c
> @@ -105,6 +105,11 @@ static int tcg_init(MachineState *ms)
>      tcg_exec_init(s->tb_size * 1024 * 1024);
>      mttcg_enabled = s->mttcg_enabled;
>  
> +    /*
> +     * Initialize TCG regions
> +     */
> +    tcg_region_init();
> +

It might be worth noting in the commit log this is OK because the
accelerator is now guaranteed to be initialised so you know the details
of MTTCG when initialising the regions.

>      if (mttcg_enabled) {
>          cpus_register_accel(&tcg_cpus_mttcg);
>      } else if (icount_enabled()) {
> diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
> index d3af3afb6d..82dbe2cacf 100644
<snip>
> diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
> deleted file mode 100644
> index d1bd771f49..0000000000
> --- a/accel/tcg/tcg-cpus-mttcg.h
> +++ /dev/null
> @@ -1,21 +0,0 @@
> -/*
> - * QEMU TCG Multi Threaded vCPUs implementation
> - *
> - * Copyright 2020 SUSE LLC
> - *
> - * This work is licensed under the terms of the GNU GPL, version 2 or later.
> - * See the COPYING file in the top-level directory.
> - */
> -
> -#ifndef TCG_CPUS_MTTCG_H
> -#define TCG_CPUS_MTTCG_H
> -
> -/*
> - * In the multi-threaded case each vCPU has its own thread. The TLS
> - * variable current_cpu can be used deep in the code to find the
> - * current CPUState for a given thread.
> - */

Ahh gone now ;-)

> -
> -void *tcg_cpu_thread_fn(void *arg);
> -
> -#endif /* TCG_CPUS_MTTCG_H */
> diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
> index ad50a3765f..f3b262bec7 100644
> --- a/accel/tcg/tcg-cpus-rr.c
> +++ b/accel/tcg/tcg-cpus-rr.c
> @@ -144,7 +144,7 @@ static void deal_with_unplugged_cpus(void)
>   * elsewhere.
>   */
>  
> -void *tcg_rr_cpu_thread_fn(void *arg)
> +static void *tcg_rr_cpu_thread_fn(void *arg)
>  {
>      CPUState *cpu = arg;
>  
> @@ -262,8 +262,43 @@ void *tcg_rr_cpu_thread_fn(void *arg)
>      return NULL;
>  }
>  
> +void rr_start_vcpu_thread(CPUState *cpu)
> +{
> +    char thread_name[VCPU_THREAD_NAME_SIZE];
> +    static QemuCond *single_tcg_halt_cond;
> +    static QemuThread *single_tcg_cpu_thread;
> +
> +    g_assert(tcg_enabled());
> +    parallel_cpus = false;
> +
> +    if (!single_tcg_cpu_thread) {
> +        cpu->thread = g_malloc0(sizeof(QemuThread));
> +        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
> +        qemu_cond_init(cpu->halt_cond);
> +
> +        /* share a single thread for all cpus with TCG */
> +        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
> +        qemu_thread_create(cpu->thread, thread_name,
> +                           tcg_rr_cpu_thread_fn,
> +                           cpu, QEMU_THREAD_JOINABLE);
> +
> +        single_tcg_halt_cond = cpu->halt_cond;
> +        single_tcg_cpu_thread = cpu->thread;
> +#ifdef _WIN32
> +        cpu->hThread = qemu_thread_get_handle(cpu->thread);
> +#endif
> +    } else {
> +        /* we share the thread */
> +        cpu->thread = single_tcg_cpu_thread;
> +        cpu->halt_cond = single_tcg_halt_cond;
> +        cpu->thread_id = first_cpu->thread_id;
> +        cpu->can_do_io = 1;
> +        cpu->created = true;

I was wonder if we should be duplicating hThread here but on closer
examination it looks like it's only used by HAX/WHPX accelerators so
maybe this is a candidate for accelerator specific CPU data?

Otherwise LGTM:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

-- 
Alex Bennée