On Fri, 29 Aug 2025 17:31:13 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:
> Make the code common to all accelerators: after seeing cpu->exit_request
> set to true, accelerator code needs to reach qemu_wait_io_event_common().
>
> So for the common cases where they use qemu_wait_io_event(), go ahead and
> clear it in there. Note that the cheap qatomic_set() is enough because
> at this point the thread has taken the BQL; qatomic_set_mb() is not needed.
> In particular, this is the ordering of the communication between
> I/O and vCPU threads is always the same.
>
> In the I/O thread:
>
> (a) store other memory locations that will be checked if cpu->exit_request
> or cpu->interrupt_request is 1 (for example cpu->stop or cpu->work_list
> for cpu->exit_request)
>
> (b) cpu_exit(): store-release cpu->exit_request, or
> (b) cpu_interrupt(): store-release cpu->interrupt_request
>
> >>> at this point, cpu->halt_cond is broadcast and the BQL released
>
> (c) do the accelerator-specific kick (e.g. write icount_decr for TCG,
> pthread_kill for KVM, etc.)
>
> In the vCPU thread instead the opposite order is respected:
>
> (c) the accelerator's execution loop exits thanks to the kick
>
> (b) then the inner execution loop checks cpu->interrupt_request
> and cpu->exit_request. If needed cpu->interrupt_request is
> converted into cpu->exit_request when work is needed outside
> the execution loop.
>
> (a) then the other memory locations are checked. Some may need to
> be read under the BQL, but the vCPU thread may also take other
> locks (e.g. for queued work items) or none at all.
>
> qatomic_set_mb() would only be needed if the halt sleep was done
> outside the BQL (though in that case, cpu->exit_request probably
> would be replaced by a QemuEvent or something like that).
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
> ---
> accel/kvm/kvm-all.c | 2 --
> accel/tcg/cpu-exec.c | 1 -
> accel/tcg/tcg-accel-ops-rr.c | 9 +++++++--
> accel/tcg/tcg-accel-ops.c | 2 --
> accel/tcg/user-exec.c | 1 +
> system/cpus.c | 1 +
> target/i386/nvmm/nvmm-all.c | 2 --
> target/i386/whpx/whpx-all.c | 2 --
> 8 files changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index e4167d94b4f..d13156bee87 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -3155,7 +3155,6 @@ int kvm_cpu_exec(CPUState *cpu)
> trace_kvm_cpu_exec();
>
> if (kvm_arch_process_async_events(cpu)) {
> - qatomic_set(&cpu->exit_request, 0);
> return EXCP_HLT;
> }
>
> @@ -3345,7 +3344,6 @@ int kvm_cpu_exec(CPUState *cpu)
> vm_stop(RUN_STATE_INTERNAL_ERROR);
> }
>
> - qatomic_set(&cpu->exit_request, 0);
> return ret;
> }
>
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 3ae545e888f..ad94f96b252 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -872,7 +872,6 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
> * The corresponding store-release is in cpu_exit.
> */
> if (unlikely(qatomic_load_acquire(&cpu->exit_request)) || icount_exit_request(cpu)) {
> - qatomic_set(&cpu->exit_request, 0);
> if (cpu->exception_index == -1) {
> cpu->exception_index = EXCP_INTERRUPT;
> }
> diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
> index 610292d3bac..e9d291dc391 100644
> --- a/accel/tcg/tcg-accel-ops-rr.c
> +++ b/accel/tcg/tcg-accel-ops-rr.c
> @@ -286,8 +286,13 @@ static void *rr_cpu_thread_fn(void *arg)
> /* Does not need a memory barrier because a spurious wakeup is okay. */
> qatomic_set(&rr_current_cpu, NULL);
>
> - if (cpu && qatomic_read(&cpu->exit_request)) {
> - qatomic_set_mb(&cpu->exit_request, 0);
> + if (cpu) {
> + /*
> + * This could even reset exit_request for all CPUs, but in practice
> + * races between CPU exits and changes to "cpu" are so rare that
> + * there's no advantage in doing so.
> + */
> + qatomic_set(&cpu->exit_request, false);
> }
>
> if (icount_enabled() && all_cpu_threads_idle()) {
> diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
> index 1f662a9c745..3bd98005042 100644
> --- a/accel/tcg/tcg-accel-ops.c
> +++ b/accel/tcg/tcg-accel-ops.c
> @@ -82,8 +82,6 @@ int tcg_cpu_exec(CPUState *cpu)
> ret = cpu_exec(cpu);
> cpu_exec_end(cpu);
>
> - qatomic_set_mb(&cpu->exit_request, 0);
> -
> return ret;
> }
>
> diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
> index 81906d2e033..8f4f049b924 100644
> --- a/accel/tcg/user-exec.c
> +++ b/accel/tcg/user-exec.c
> @@ -54,6 +54,7 @@ void qemu_cpu_kick(CPUState *cpu)
>
> void qemu_wait_io_event(CPUState *cpu)
> {
> + qatomic_set(&cpu->exit_request, false);
> process_queued_cpu_work(cpu);
> }
>
> diff --git a/system/cpus.c b/system/cpus.c
> index bb13942cbb7..f989d9938b6 100644
> --- a/system/cpus.c
> +++ b/system/cpus.c
> @@ -463,6 +463,7 @@ void qemu_wait_io_event(CPUState *cpu)
> {
> bool slept = false;
>
> + qatomic_set(&cpu->exit_request, false);
> while (cpu_thread_is_idle(cpu)) {
> if (!slept) {
> slept = true;
> diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
> index 7e36c42fbb4..ed424251673 100644
> --- a/target/i386/nvmm/nvmm-all.c
> +++ b/target/i386/nvmm/nvmm-all.c
> @@ -817,8 +817,6 @@ nvmm_vcpu_loop(CPUState *cpu)
> cpu_exec_end(cpu);
> bql_lock();
>
> - qatomic_set(&cpu->exit_request, false);
> -
> return ret < 0;
> }
>
> diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
> index 00fb7e23100..2a85168ed51 100644
> --- a/target/i386/whpx/whpx-all.c
> +++ b/target/i386/whpx/whpx-all.c
> @@ -2050,8 +2050,6 @@ static int whpx_vcpu_run(CPUState *cpu)
> whpx_last_vcpu_stopping(cpu);
> }
>
> - qatomic_set(&cpu->exit_request, false);
> -
> return ret < 0;
> }
>