[PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same

Paolo Bonzini posted 18 patches 1 week ago
Maintainers: Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Cameron Esfahani <dirty@apple.com>, Roman Bolshakov <rbolshakov@ddn.com>, Phil Dennis-Jordan <phil@philjordan.eu>, Mads Ynddal <mads@ynddal.dk>, Riku Voipio <riku.voipio@iki.fi>, Warner Losh <imp@bsdimp.com>, Kyle Evans <kevans@freebsd.org>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Yanan Wang <wangyanan55@huawei.com>, Zhao Liu <zhao1.liu@intel.com>, Nicholas Piggin <npiggin@gmail.com>, Chinmay Rath <rathc@linux.ibm.com>, Harsh Prateek Bora <harshpb@linux.ibm.com>, Laurent Vivier <laurent@vivier.eu>, Brian Cain <brian.cain@oss.qualcomm.com>, "Alex Bennée" <alex.bennee@linaro.org>, Peter Maydell <peter.maydell@linaro.org>, Michael Rolnik <mrolnik@gmail.com>, Marcelo Tosatti <mtosatti@redhat.com>, Reinoud Zandijk <reinoud@netbsd.org>, Sunil Muthuswamy <sunilmut@microsoft.com>, Stafford Horne <shorne@gmail.com>, Yoshinori Sato <yoshinori.sato@nifty.com>, David Hildenbrand <david@redhat.com>, Ilya Leoshkevich <iii@linux.ibm.com>, Thomas Huth <thuth@redhat.com>, Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>, Artyom Tarasenko <atar4qemu@gmail.com>
[PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same
Posted by Paolo Bonzini 1 week ago
There is no reason for some accelerators to use qemu_wait_io_event_common
(which is separated from qemu_wait_io_event() specifically for round
robin).  They can also check for events directly on the first pass through
the loop, instead of setting cpu->exit_request to true.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 accel/dummy-cpus.c                |  2 +-
 accel/hvf/hvf-accel-ops.c         |  2 +-
 accel/kvm/kvm-accel-ops.c         |  3 ++-
 accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
 accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
 target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
 target/i386/whpx/whpx-accel-ops.c |  6 ++---
 7 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c
index 03cfc0fa01e..1f74c727c42 100644
--- a/accel/dummy-cpus.c
+++ b/accel/dummy-cpus.c
@@ -43,6 +43,7 @@ static void *dummy_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
         bql_unlock();
 #ifndef _WIN32
         do {
@@ -57,7 +58,6 @@ static void *dummy_cpu_thread_fn(void *arg)
         qemu_sem_wait(&cpu->sem);
 #endif
         bql_lock();
-        qemu_wait_io_event(cpu);
     } while (!cpu->unplug);
 
     bql_unlock();
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index d488d6afbac..4ba3e40831f 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -192,13 +192,13 @@ static void *hvf_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
         if (cpu_can_run(cpu)) {
             r = hvf_vcpu_exec(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
             }
         }
-        qemu_wait_io_event(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     hvf_vcpu_destroy(cpu);
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
index b709187c7d7..80f0141a8a6 100644
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -47,13 +47,14 @@ static void *kvm_vcpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
+
         if (cpu_can_run(cpu)) {
             r = kvm_cpu_exec(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
             }
         }
-        qemu_wait_io_event(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     kvm_destroy_vcpu(cpu);
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 1148ebcaae5..04012900a30 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -84,10 +84,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
     cpu_thread_signal_created(cpu);
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
-    /* process any pending work */
-    qatomic_set(&cpu->exit_request, true);
-
     do {
+        qemu_wait_io_event(cpu);
+
         if (cpu_can_run(cpu)) {
             int r;
             bql_unlock();
@@ -112,8 +111,6 @@ static void *mttcg_cpu_thread_fn(void *arg)
                 break;
             }
         }
-
-        qemu_wait_io_event(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     tcg_cpu_destroy(cpu);
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index e9d291dc391..28897288db7 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -211,13 +211,30 @@ static void *rr_cpu_thread_fn(void *arg)
 
     cpu = first_cpu;
 
-    /* process any pending work */
-    qatomic_set(&cpu->exit_request, true);
-
     while (1) {
         /* Only used for icount_enabled() */
         int64_t cpu_budget = 0;
 
+        if (cpu) {
+            /*
+             * This could even reset exit_request for all CPUs, but in practice
+             * races between CPU exits and changes to "cpu" are so rare that
+             * there's no advantage in doing so.
+             */
+            qatomic_set(&cpu->exit_request, false);
+        }
+
+        if (icount_enabled() && all_cpu_threads_idle()) {
+            /*
+             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
+             * in the main_loop, wake it up in order to start the warp timer.
+             */
+            qemu_notify_event();
+        }
+
+        rr_wait_io_event();
+        rr_deal_with_unplugged_cpus();
+
         bql_unlock();
         replay_mutex_lock();
         bql_lock();
@@ -285,26 +302,6 @@ static void *rr_cpu_thread_fn(void *arg)
 
         /* Does not need a memory barrier because a spurious wakeup is okay.  */
         qatomic_set(&rr_current_cpu, NULL);
-
-        if (cpu) {
-            /*
-             * This could even reset exit_request for all CPUs, but in practice
-             * races between CPU exits and changes to "cpu" are so rare that
-             * there's no advantage in doing so.
-             */
-            qatomic_set(&cpu->exit_request, false);
-        }
-
-        if (icount_enabled() && all_cpu_threads_idle()) {
-            /*
-             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
-             * in the main_loop, wake it up in order to start the warp timer.
-             */
-            qemu_notify_event();
-        }
-
-        rr_wait_io_event();
-        rr_deal_with_unplugged_cpus();
     }
 
     g_assert_not_reached();
diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
index 86869f133e9..f51244740d8 100644
--- a/target/i386/nvmm/nvmm-accel-ops.c
+++ b/target/i386/nvmm/nvmm-accel-ops.c
@@ -42,16 +42,14 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
+
         if (cpu_can_run(cpu)) {
             r = nvmm_vcpu_exec(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
             }
         }
-        while (cpu_thread_is_idle(cpu)) {
-            qemu_cond_wait_bql(cpu->halt_cond);
-        }
-        qemu_wait_io_event_common(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     nvmm_destroy_vcpu(cpu);
diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
index da58805b1a6..611eeedeef7 100644
--- a/target/i386/whpx/whpx-accel-ops.c
+++ b/target/i386/whpx/whpx-accel-ops.c
@@ -42,16 +42,14 @@ static void *whpx_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
+
         if (cpu_can_run(cpu)) {
             r = whpx_vcpu_exec(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
             }
         }
-        while (cpu_thread_is_idle(cpu)) {
-            qemu_cond_wait_bql(cpu->halt_cond);
-        }
-        qemu_wait_io_event_common(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     whpx_destroy_vcpu(cpu);
-- 
2.51.0
Re: [PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same
Posted by Igor Mammedov 4 days, 14 hours ago
On Fri, 29 Aug 2025 17:31:14 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> There is no reason for some accelerators to use qemu_wait_io_event_common
> (which is separated from qemu_wait_io_event() specifically for round
> robin).  They can also check for events directly on the first pass through
> the loop, instead of setting cpu->exit_request to true.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Igor Mammedov <imammedo@redhat.com>

> ---
>  accel/dummy-cpus.c                |  2 +-
>  accel/hvf/hvf-accel-ops.c         |  2 +-
>  accel/kvm/kvm-accel-ops.c         |  3 ++-
>  accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
>  accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
>  target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
>  target/i386/whpx/whpx-accel-ops.c |  6 ++---
>  7 files changed, 30 insertions(+), 39 deletions(-)
> 
> diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c
> index 03cfc0fa01e..1f74c727c42 100644
> --- a/accel/dummy-cpus.c
> +++ b/accel/dummy-cpus.c
> @@ -43,6 +43,7 @@ static void *dummy_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
>          bql_unlock();
>  #ifndef _WIN32
>          do {
> @@ -57,7 +58,6 @@ static void *dummy_cpu_thread_fn(void *arg)
>          qemu_sem_wait(&cpu->sem);
>  #endif
>          bql_lock();
> -        qemu_wait_io_event(cpu);
>      } while (!cpu->unplug);
>  
>      bql_unlock();
> diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
> index d488d6afbac..4ba3e40831f 100644
> --- a/accel/hvf/hvf-accel-ops.c
> +++ b/accel/hvf/hvf-accel-ops.c
> @@ -192,13 +192,13 @@ static void *hvf_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
>          if (cpu_can_run(cpu)) {
>              r = hvf_vcpu_exec(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>              }
>          }
> -        qemu_wait_io_event(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      hvf_vcpu_destroy(cpu);
> diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
> index b709187c7d7..80f0141a8a6 100644
> --- a/accel/kvm/kvm-accel-ops.c
> +++ b/accel/kvm/kvm-accel-ops.c
> @@ -47,13 +47,14 @@ static void *kvm_vcpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
> +
>          if (cpu_can_run(cpu)) {
>              r = kvm_cpu_exec(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>              }
>          }
> -        qemu_wait_io_event(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      kvm_destroy_vcpu(cpu);
> diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
> index 1148ebcaae5..04012900a30 100644
> --- a/accel/tcg/tcg-accel-ops-mttcg.c
> +++ b/accel/tcg/tcg-accel-ops-mttcg.c
> @@ -84,10 +84,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
>      cpu_thread_signal_created(cpu);
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
> -    /* process any pending work */
> -    qatomic_set(&cpu->exit_request, true);
> -
>      do {
> +        qemu_wait_io_event(cpu);
> +
>          if (cpu_can_run(cpu)) {
>              int r;
>              bql_unlock();
> @@ -112,8 +111,6 @@ static void *mttcg_cpu_thread_fn(void *arg)
>                  break;
>              }
>          }
> -
> -        qemu_wait_io_event(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      tcg_cpu_destroy(cpu);
> diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
> index e9d291dc391..28897288db7 100644
> --- a/accel/tcg/tcg-accel-ops-rr.c
> +++ b/accel/tcg/tcg-accel-ops-rr.c
> @@ -211,13 +211,30 @@ static void *rr_cpu_thread_fn(void *arg)
>  
>      cpu = first_cpu;
>  
> -    /* process any pending work */
> -    qatomic_set(&cpu->exit_request, true);
> -
>      while (1) {
>          /* Only used for icount_enabled() */
>          int64_t cpu_budget = 0;
>  
> +        if (cpu) {
> +            /*
> +             * This could even reset exit_request for all CPUs, but in practice
> +             * races between CPU exits and changes to "cpu" are so rare that
> +             * there's no advantage in doing so.
> +             */
> +            qatomic_set(&cpu->exit_request, false);
> +        }
> +
> +        if (icount_enabled() && all_cpu_threads_idle()) {
> +            /*
> +             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
> +             * in the main_loop, wake it up in order to start the warp timer.
> +             */
> +            qemu_notify_event();
> +        }
> +
> +        rr_wait_io_event();
> +        rr_deal_with_unplugged_cpus();
> +
>          bql_unlock();
>          replay_mutex_lock();
>          bql_lock();
> @@ -285,26 +302,6 @@ static void *rr_cpu_thread_fn(void *arg)
>  
>          /* Does not need a memory barrier because a spurious wakeup is okay.  */
>          qatomic_set(&rr_current_cpu, NULL);
> -
> -        if (cpu) {
> -            /*
> -             * This could even reset exit_request for all CPUs, but in practice
> -             * races between CPU exits and changes to "cpu" are so rare that
> -             * there's no advantage in doing so.
> -             */
> -            qatomic_set(&cpu->exit_request, false);
> -        }
> -
> -        if (icount_enabled() && all_cpu_threads_idle()) {
> -            /*
> -             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
> -             * in the main_loop, wake it up in order to start the warp timer.
> -             */
> -            qemu_notify_event();
> -        }
> -
> -        rr_wait_io_event();
> -        rr_deal_with_unplugged_cpus();
>      }
>  
>      g_assert_not_reached();
> diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
> index 86869f133e9..f51244740d8 100644
> --- a/target/i386/nvmm/nvmm-accel-ops.c
> +++ b/target/i386/nvmm/nvmm-accel-ops.c
> @@ -42,16 +42,14 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
> +
>          if (cpu_can_run(cpu)) {
>              r = nvmm_vcpu_exec(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>              }
>          }
> -        while (cpu_thread_is_idle(cpu)) {
> -            qemu_cond_wait_bql(cpu->halt_cond);
> -        }
> -        qemu_wait_io_event_common(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      nvmm_destroy_vcpu(cpu);
> diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
> index da58805b1a6..611eeedeef7 100644
> --- a/target/i386/whpx/whpx-accel-ops.c
> +++ b/target/i386/whpx/whpx-accel-ops.c
> @@ -42,16 +42,14 @@ static void *whpx_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
> +
>          if (cpu_can_run(cpu)) {
>              r = whpx_vcpu_exec(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>              }
>          }
> -        while (cpu_thread_is_idle(cpu)) {
> -            qemu_cond_wait_bql(cpu->halt_cond);
> -        }
> -        qemu_wait_io_event_common(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      whpx_destroy_vcpu(cpu);
Re: [PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same
Posted by Philippe Mathieu-Daudé 4 days, 16 hours ago
On 29/8/25 17:31, Paolo Bonzini wrote:
> There is no reason for some accelerators to use qemu_wait_io_event_common
> (which is separated from qemu_wait_io_event() specifically for round
> robin).  They can also check for events directly on the first pass through
> the loop, instead of setting cpu->exit_request to true.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   accel/dummy-cpus.c                |  2 +-
>   accel/hvf/hvf-accel-ops.c         |  2 +-
>   accel/kvm/kvm-accel-ops.c         |  3 ++-
>   accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
>   accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
>   target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
>   target/i386/whpx/whpx-accel-ops.c |  6 ++---
>   7 files changed, 30 insertions(+), 39 deletions(-)

Nice.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>


Re: [PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same
Posted by Richard Henderson 1 week ago
On 8/30/25 01:31, Paolo Bonzini wrote:
> There is no reason for some accelerators to use qemu_wait_io_event_common
> (which is separated from qemu_wait_io_event() specifically for round
> robin).  They can also check for events directly on the first pass through
> the loop, instead of setting cpu->exit_request to true.
> 
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
>   accel/dummy-cpus.c                |  2 +-
>   accel/hvf/hvf-accel-ops.c         |  2 +-
>   accel/kvm/kvm-accel-ops.c         |  3 ++-
>   accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
>   accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
>   target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
>   target/i386/whpx/whpx-accel-ops.c |  6 ++---
>   7 files changed, 30 insertions(+), 39 deletions(-)


Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


To-do for myself: It appears as if we can reduce the number of checks for cpu == NULL in 
the rr loop by moving the cpu=first_cpu assignment to the right place.


r~
Re: [PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same
Posted by Philippe Mathieu-Daudé 4 days, 16 hours ago
On 30/8/25 00:16, Richard Henderson wrote:
> On 8/30/25 01:31, Paolo Bonzini wrote:
>> There is no reason for some accelerators to use qemu_wait_io_event_common
>> (which is separated from qemu_wait_io_event() specifically for round
>> robin).  They can also check for events directly on the first pass 
>> through
>> the loop, instead of setting cpu->exit_request to true.
>>
>> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
>> ---
>>   accel/dummy-cpus.c                |  2 +-
>>   accel/hvf/hvf-accel-ops.c         |  2 +-
>>   accel/kvm/kvm-accel-ops.c         |  3 ++-
>>   accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
>>   accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
>>   target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
>>   target/i386/whpx/whpx-accel-ops.c |  6 ++---
>>   7 files changed, 30 insertions(+), 39 deletions(-)
> 
> 
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
> 
> To-do for myself: It appears as if we can reduce the number of checks 
> for cpu == NULL in the rr loop by moving the cpu=first_cpu assignment to 
> the right place.

This was my intent here:
https://lore.kernel.org/qemu-devel/20250128142152.9889-2-philmd@linaro.org/