[v2] accel, cpus: clean up cpu->exit_request

[PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same

Posted by Paolo Bonzini 1 week ago

There is no reason for some accelerators to use qemu_wait_io_event_common
(which is separated from qemu_wait_io_event() specifically for round
robin).  They can also check for events directly on the first pass through
the loop, instead of setting cpu->exit_request to true.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 accel/dummy-cpus.c                |  2 +-
 accel/hvf/hvf-accel-ops.c         |  2 +-
 accel/kvm/kvm-accel-ops.c         |  3 ++-
 accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
 accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
 target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
 target/i386/whpx/whpx-accel-ops.c |  6 ++---
 7 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c
index 03cfc0fa01e..1f74c727c42 100644
--- a/accel/dummy-cpus.c
+++ b/accel/dummy-cpus.c
@@ -43,6 +43,7 @@ static void *dummy_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
         bql_unlock();
 #ifndef _WIN32
         do {
@@ -57,7 +58,6 @@ static void *dummy_cpu_thread_fn(void *arg)
         qemu_sem_wait(&cpu->sem);
 #endif
         bql_lock();
-        qemu_wait_io_event(cpu);
     } while (!cpu->unplug);
 
     bql_unlock();
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index d488d6afbac..4ba3e40831f 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -192,13 +192,13 @@ static void *hvf_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
         if (cpu_can_run(cpu)) {
             r = hvf_vcpu_exec(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
             }
         }
-        qemu_wait_io_event(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     hvf_vcpu_destroy(cpu);
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
index b709187c7d7..80f0141a8a6 100644
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -47,13 +47,14 @@ static void *kvm_vcpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
+
         if (cpu_can_run(cpu)) {
             r = kvm_cpu_exec(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
             }
         }
-        qemu_wait_io_event(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     kvm_destroy_vcpu(cpu);
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 1148ebcaae5..04012900a30 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -84,10 +84,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
     cpu_thread_signal_created(cpu);
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
-    /* process any pending work */
-    qatomic_set(&cpu->exit_request, true);
-
     do {
+        qemu_wait_io_event(cpu);
+
         if (cpu_can_run(cpu)) {
             int r;
             bql_unlock();
@@ -112,8 +111,6 @@ static void *mttcg_cpu_thread_fn(void *arg)
                 break;
             }
         }
-
-        qemu_wait_io_event(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     tcg_cpu_destroy(cpu);
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index e9d291dc391..28897288db7 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -211,13 +211,30 @@ static void *rr_cpu_thread_fn(void *arg)
 
     cpu = first_cpu;
 
-    /* process any pending work */
-    qatomic_set(&cpu->exit_request, true);
-
     while (1) {
         /* Only used for icount_enabled() */
         int64_t cpu_budget = 0;
 
+        if (cpu) {
+            /*
+             * This could even reset exit_request for all CPUs, but in practice
+             * races between CPU exits and changes to "cpu" are so rare that
+             * there's no advantage in doing so.
+             */
+            qatomic_set(&cpu->exit_request, false);
+        }
+
+        if (icount_enabled() && all_cpu_threads_idle()) {
+            /*
+             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
+             * in the main_loop, wake it up in order to start the warp timer.
+             */
+            qemu_notify_event();
+        }
+
+        rr_wait_io_event();
+        rr_deal_with_unplugged_cpus();
+
         bql_unlock();
         replay_mutex_lock();
         bql_lock();
@@ -285,26 +302,6 @@ static void *rr_cpu_thread_fn(void *arg)
 
         /* Does not need a memory barrier because a spurious wakeup is okay.  */
         qatomic_set(&rr_current_cpu, NULL);
-
-        if (cpu) {
-            /*
-             * This could even reset exit_request for all CPUs, but in practice
-             * races between CPU exits and changes to "cpu" are so rare that
-             * there's no advantage in doing so.
-             */
-            qatomic_set(&cpu->exit_request, false);
-        }
-
-        if (icount_enabled() && all_cpu_threads_idle()) {
-            /*
-             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
-             * in the main_loop, wake it up in order to start the warp timer.
-             */
-            qemu_notify_event();
-        }
-
-        rr_wait_io_event();
-        rr_deal_with_unplugged_cpus();
     }
 
     g_assert_not_reached();
diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
index 86869f133e9..f51244740d8 100644
--- a/target/i386/nvmm/nvmm-accel-ops.c
+++ b/target/i386/nvmm/nvmm-accel-ops.c
@@ -42,16 +42,14 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
+
         if (cpu_can_run(cpu)) {
             r = nvmm_vcpu_exec(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
             }
         }
-        while (cpu_thread_is_idle(cpu)) {
-            qemu_cond_wait_bql(cpu->halt_cond);
-        }
-        qemu_wait_io_event_common(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     nvmm_destroy_vcpu(cpu);
diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
index da58805b1a6..611eeedeef7 100644
--- a/target/i386/whpx/whpx-accel-ops.c
+++ b/target/i386/whpx/whpx-accel-ops.c
@@ -42,16 +42,14 @@ static void *whpx_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     do {
+        qemu_wait_io_event(cpu);
+
         if (cpu_can_run(cpu)) {
             r = whpx_vcpu_exec(cpu);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
             }
         }
-        while (cpu_thread_is_idle(cpu)) {
-            qemu_cond_wait_bql(cpu->halt_cond);
-        }
-        qemu_wait_io_event_common(cpu);
     } while (!cpu->unplug || cpu_can_run(cpu));
 
     whpx_destroy_vcpu(cpu);
-- 
2.51.0

Re: [PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same

Posted by Igor Mammedov 4 days, 14 hours ago

On Fri, 29 Aug 2025 17:31:14 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> There is no reason for some accelerators to use qemu_wait_io_event_common
> (which is separated from qemu_wait_io_event() specifically for round
> robin).  They can also check for events directly on the first pass through
> the loop, instead of setting cpu->exit_request to true.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Igor Mammedov <imammedo@redhat.com>

> ---
>  accel/dummy-cpus.c                |  2 +-
>  accel/hvf/hvf-accel-ops.c         |  2 +-
>  accel/kvm/kvm-accel-ops.c         |  3 ++-
>  accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
>  accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
>  target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
>  target/i386/whpx/whpx-accel-ops.c |  6 ++---
>  7 files changed, 30 insertions(+), 39 deletions(-)
> 
> diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c
> index 03cfc0fa01e..1f74c727c42 100644
> --- a/accel/dummy-cpus.c
> +++ b/accel/dummy-cpus.c
> @@ -43,6 +43,7 @@ static void *dummy_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
>          bql_unlock();
>  #ifndef _WIN32
>          do {
> @@ -57,7 +58,6 @@ static void *dummy_cpu_thread_fn(void *arg)
>          qemu_sem_wait(&cpu->sem);
>  #endif
>          bql_lock();
> -        qemu_wait_io_event(cpu);
>      } while (!cpu->unplug);
>  
>      bql_unlock();
> diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
> index d488d6afbac..4ba3e40831f 100644
> --- a/accel/hvf/hvf-accel-ops.c
> +++ b/accel/hvf/hvf-accel-ops.c
> @@ -192,13 +192,13 @@ static void *hvf_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
>          if (cpu_can_run(cpu)) {
>              r = hvf_vcpu_exec(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>              }
>          }
> -        qemu_wait_io_event(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      hvf_vcpu_destroy(cpu);
> diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
> index b709187c7d7..80f0141a8a6 100644
> --- a/accel/kvm/kvm-accel-ops.c
> +++ b/accel/kvm/kvm-accel-ops.c
> @@ -47,13 +47,14 @@ static void *kvm_vcpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
> +
>          if (cpu_can_run(cpu)) {
>              r = kvm_cpu_exec(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>              }
>          }
> -        qemu_wait_io_event(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      kvm_destroy_vcpu(cpu);
> diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
> index 1148ebcaae5..04012900a30 100644
> --- a/accel/tcg/tcg-accel-ops-mttcg.c
> +++ b/accel/tcg/tcg-accel-ops-mttcg.c
> @@ -84,10 +84,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
>      cpu_thread_signal_created(cpu);
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
> -    /* process any pending work */
> -    qatomic_set(&cpu->exit_request, true);
> -
>      do {
> +        qemu_wait_io_event(cpu);
> +
>          if (cpu_can_run(cpu)) {
>              int r;
>              bql_unlock();
> @@ -112,8 +111,6 @@ static void *mttcg_cpu_thread_fn(void *arg)
>                  break;
>              }
>          }
> -
> -        qemu_wait_io_event(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      tcg_cpu_destroy(cpu);
> diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
> index e9d291dc391..28897288db7 100644
> --- a/accel/tcg/tcg-accel-ops-rr.c
> +++ b/accel/tcg/tcg-accel-ops-rr.c
> @@ -211,13 +211,30 @@ static void *rr_cpu_thread_fn(void *arg)
>  
>      cpu = first_cpu;
>  
> -    /* process any pending work */
> -    qatomic_set(&cpu->exit_request, true);
> -
>      while (1) {
>          /* Only used for icount_enabled() */
>          int64_t cpu_budget = 0;
>  
> +        if (cpu) {
> +            /*
> +             * This could even reset exit_request for all CPUs, but in practice
> +             * races between CPU exits and changes to "cpu" are so rare that
> +             * there's no advantage in doing so.
> +             */
> +            qatomic_set(&cpu->exit_request, false);
> +        }
> +
> +        if (icount_enabled() && all_cpu_threads_idle()) {
> +            /*
> +             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
> +             * in the main_loop, wake it up in order to start the warp timer.
> +             */
> +            qemu_notify_event();
> +        }
> +
> +        rr_wait_io_event();
> +        rr_deal_with_unplugged_cpus();
> +
>          bql_unlock();
>          replay_mutex_lock();
>          bql_lock();
> @@ -285,26 +302,6 @@ static void *rr_cpu_thread_fn(void *arg)
>  
>          /* Does not need a memory barrier because a spurious wakeup is okay.  */
>          qatomic_set(&rr_current_cpu, NULL);
> -
> -        if (cpu) {
> -            /*
> -             * This could even reset exit_request for all CPUs, but in practice
> -             * races between CPU exits and changes to "cpu" are so rare that
> -             * there's no advantage in doing so.
> -             */
> -            qatomic_set(&cpu->exit_request, false);
> -        }
> -
> -        if (icount_enabled() && all_cpu_threads_idle()) {
> -            /*
> -             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
> -             * in the main_loop, wake it up in order to start the warp timer.
> -             */
> -            qemu_notify_event();
> -        }
> -
> -        rr_wait_io_event();
> -        rr_deal_with_unplugged_cpus();
>      }
>  
>      g_assert_not_reached();
> diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
> index 86869f133e9..f51244740d8 100644
> --- a/target/i386/nvmm/nvmm-accel-ops.c
> +++ b/target/i386/nvmm/nvmm-accel-ops.c
> @@ -42,16 +42,14 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
> +
>          if (cpu_can_run(cpu)) {
>              r = nvmm_vcpu_exec(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>              }
>          }
> -        while (cpu_thread_is_idle(cpu)) {
> -            qemu_cond_wait_bql(cpu->halt_cond);
> -        }
> -        qemu_wait_io_event_common(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      nvmm_destroy_vcpu(cpu);
> diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
> index da58805b1a6..611eeedeef7 100644
> --- a/target/i386/whpx/whpx-accel-ops.c
> +++ b/target/i386/whpx/whpx-accel-ops.c
> @@ -42,16 +42,14 @@ static void *whpx_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      do {
> +        qemu_wait_io_event(cpu);
> +
>          if (cpu_can_run(cpu)) {
>              r = whpx_vcpu_exec(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>              }
>          }
> -        while (cpu_thread_is_idle(cpu)) {
> -            qemu_cond_wait_bql(cpu->halt_cond);
> -        }
> -        qemu_wait_io_event_common(cpu);
>      } while (!cpu->unplug || cpu_can_run(cpu));
>  
>      whpx_destroy_vcpu(cpu);

Re: [PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same

Posted by Philippe Mathieu-Daudé 4 days, 16 hours ago

On 29/8/25 17:31, Paolo Bonzini wrote:
> There is no reason for some accelerators to use qemu_wait_io_event_common
> (which is separated from qemu_wait_io_event() specifically for round
> robin).  They can also check for events directly on the first pass through
> the loop, instead of setting cpu->exit_request to true.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   accel/dummy-cpus.c                |  2 +-
>   accel/hvf/hvf-accel-ops.c         |  2 +-
>   accel/kvm/kvm-accel-ops.c         |  3 ++-
>   accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
>   accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
>   target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
>   target/i386/whpx/whpx-accel-ops.c |  6 ++---
>   7 files changed, 30 insertions(+), 39 deletions(-)

Nice.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>

Re: [PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same

Posted by Richard Henderson 1 week ago

On 8/30/25 01:31, Paolo Bonzini wrote:
> There is no reason for some accelerators to use qemu_wait_io_event_common
> (which is separated from qemu_wait_io_event() specifically for round
> robin).  They can also check for events directly on the first pass through
> the loop, instead of setting cpu->exit_request to true.
> 
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
>   accel/dummy-cpus.c                |  2 +-
>   accel/hvf/hvf-accel-ops.c         |  2 +-
>   accel/kvm/kvm-accel-ops.c         |  3 ++-
>   accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
>   accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
>   target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
>   target/i386/whpx/whpx-accel-ops.c |  6 ++---
>   7 files changed, 30 insertions(+), 39 deletions(-)


Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


To-do for myself: It appears as if we can reduce the number of checks for cpu == NULL in 
the rr loop by moving the cpu=first_cpu assignment to the right place.


r~

Re: [PATCH 17/18] accel: make all calls to qemu_wait_io_event look the same

Posted by Philippe Mathieu-Daudé 4 days, 16 hours ago

On 30/8/25 00:16, Richard Henderson wrote:
> On 8/30/25 01:31, Paolo Bonzini wrote:
>> There is no reason for some accelerators to use qemu_wait_io_event_common
>> (which is separated from qemu_wait_io_event() specifically for round
>> robin).  They can also check for events directly on the first pass 
>> through
>> the loop, instead of setting cpu->exit_request to true.
>>
>> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
>> ---
>>   accel/dummy-cpus.c                |  2 +-
>>   accel/hvf/hvf-accel-ops.c         |  2 +-
>>   accel/kvm/kvm-accel-ops.c         |  3 ++-
>>   accel/tcg/tcg-accel-ops-mttcg.c   |  7 ++---
>>   accel/tcg/tcg-accel-ops-rr.c      | 43 ++++++++++++++-----------------
>>   target/i386/nvmm/nvmm-accel-ops.c |  6 ++---
>>   target/i386/whpx/whpx-accel-ops.c |  6 ++---
>>   7 files changed, 30 insertions(+), 39 deletions(-)
> 
> 
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
> 
> To-do for myself: It appears as if we can reduce the number of checks 
> for cpu == NULL in the rr loop by moving the cpu=first_cpu assignment to 
> the right place.

This was my intent here:
https://lore.kernel.org/qemu-devel/20250128142152.9889-2-philmd@linaro.org/