[v1] CPU, Rust, x86 changes for 2025-09-13

[PULL 17/61] cpus: clear exit_request in qemu_process_cpu_events
Posted by Paolo Bonzini 4 months, 4 weeks ago
Make the code common to all accelerators: after seeing cpu->exit_request
set to true, accelerator code needs to reach qemu_process_cpu_events_common().

So for the common cases where they use qemu_process_cpu_events(), go ahead and
clear it in there.  Note that the cheap qatomic_set() is enough because
at this point the thread has taken the BQL; qatomic_set_mb() is not needed.
In particular, this is the ordering of the communication between
I/O and vCPU threads is always the same.

In the I/O thread:

(a) store other memory locations that will be checked if cpu->exit_request
    or cpu->interrupt_request is 1 (for example cpu->stop or cpu->work_list
    for cpu->exit_request)

(b) cpu_exit(): store-release cpu->exit_request, or
(b) cpu_interrupt(): store-release cpu->interrupt_request

>>> at this point, cpu->halt_cond is broadcast and the BQL released

(c) do the accelerator-specific kick (e.g. write icount_decr for TCG,
    pthread_kill for KVM, etc.)

In the vCPU thread instead the opposite order is respected:

(c) the accelerator's execution loop exits thanks to the kick

(b) then the inner execution loop checks cpu->interrupt_request
    and cpu->exit_request.  If needed cpu->interrupt_request is
    converted into cpu->exit_request when work is needed outside
    the execution loop.

(a) then the other memory locations are checked.  Some may need to
    be read under the BQL, but the vCPU thread may also take other
    locks (e.g. for queued work items) or none at all.

qatomic_set_mb() would only be needed if the halt sleep was done
outside the BQL (though in that case, cpu->exit_request probably
would be replaced by a QemuEvent or something like that).

Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 accel/kvm/kvm-all.c          | 2 --
 accel/tcg/cpu-exec.c         | 1 -
 accel/tcg/tcg-accel-ops-rr.c | 9 +++++++--
 accel/tcg/tcg-accel-ops.c    | 2 --
 accel/tcg/user-exec.c        | 1 +
 system/cpus.c                | 1 +
 target/i386/nvmm/nvmm-all.c  | 2 --
 target/i386/whpx/whpx-all.c  | 2 --
 8 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index e4167d94b4f..d13156bee87 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3155,7 +3155,6 @@ int kvm_cpu_exec(CPUState *cpu)
     trace_kvm_cpu_exec();
 
     if (kvm_arch_process_async_events(cpu)) {
-        qatomic_set(&cpu->exit_request, 0);
         return EXCP_HLT;
     }
 
@@ -3345,7 +3344,6 @@ int kvm_cpu_exec(CPUState *cpu)
         vm_stop(RUN_STATE_INTERNAL_ERROR);
     }
 
-    qatomic_set(&cpu->exit_request, 0);
     return ret;
 }
 
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 3ae545e888f..ad94f96b252 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -872,7 +872,6 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
      * The corresponding store-release is in cpu_exit.
      */
     if (unlikely(qatomic_load_acquire(&cpu->exit_request)) || icount_exit_request(cpu)) {
-        qatomic_set(&cpu->exit_request, 0);
         if (cpu->exception_index == -1) {
             cpu->exception_index = EXCP_INTERRUPT;
         }
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 813b313859a..7dbdba7b514 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -293,8 +293,13 @@ static void *rr_cpu_thread_fn(void *arg)
         /* Does not need a memory barrier because a spurious wakeup is okay.  */
         qatomic_set(&rr_current_cpu, NULL);
 
-        if (cpu && qatomic_read(&cpu->exit_request)) {
-            qatomic_set_mb(&cpu->exit_request, 0);
+        if (cpu) {
+            /*
+             * This could even reset exit_request for all CPUs, but in practice
+             * races between CPU exits and changes to "cpu" are so rare that
+             * there's no advantage in doing so.
+             */
+            qatomic_set(&cpu->exit_request, false);
         }
 
         if (icount_enabled() && all_cpu_threads_idle()) {
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 1f662a9c745..3bd98005042 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -82,8 +82,6 @@ int tcg_cpu_exec(CPUState *cpu)
     ret = cpu_exec(cpu);
     cpu_exec_end(cpu);
 
-    qatomic_set_mb(&cpu->exit_request, 0);
-
     return ret;
 }
 
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 65f5da6c506..916f18754f6 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -54,6 +54,7 @@ void qemu_cpu_kick(CPUState *cpu)
 
 void qemu_process_cpu_events(CPUState *cpu)
 {
+    qatomic_set(&cpu->exit_request, false);
     process_queued_cpu_work(cpu);
 }
 
diff --git a/system/cpus.c b/system/cpus.c
index fd804e07327..aa7bfcf56e5 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -463,6 +463,7 @@ void qemu_process_cpu_events(CPUState *cpu)
 {
     bool slept = false;
 
+    qatomic_set(&cpu->exit_request, false);
     while (cpu_thread_is_idle(cpu)) {
         if (!slept) {
             slept = true;
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index 7e36c42fbb4..ed424251673 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -817,8 +817,6 @@ nvmm_vcpu_loop(CPUState *cpu)
     cpu_exec_end(cpu);
     bql_lock();
 
-    qatomic_set(&cpu->exit_request, false);
-
     return ret < 0;
 }
 
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index 00fb7e23100..2a85168ed51 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -2050,8 +2050,6 @@ static int whpx_vcpu_run(CPUState *cpu)
         whpx_last_vcpu_stopping(cpu);
     }
 
-    qatomic_set(&cpu->exit_request, false);
-
     return ret < 0;
 }
 
-- 
2.51.0