[PATCH 10/18] accel: use atomic accesses for exit_request

Paolo Bonzini posted 18 patches 1 week ago
Maintainers: Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Cameron Esfahani <dirty@apple.com>, Roman Bolshakov <rbolshakov@ddn.com>, Phil Dennis-Jordan <phil@philjordan.eu>, Mads Ynddal <mads@ynddal.dk>, Riku Voipio <riku.voipio@iki.fi>, Warner Losh <imp@bsdimp.com>, Kyle Evans <kevans@freebsd.org>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Yanan Wang <wangyanan55@huawei.com>, Zhao Liu <zhao1.liu@intel.com>, Nicholas Piggin <npiggin@gmail.com>, Chinmay Rath <rathc@linux.ibm.com>, Harsh Prateek Bora <harshpb@linux.ibm.com>, Laurent Vivier <laurent@vivier.eu>, Brian Cain <brian.cain@oss.qualcomm.com>, "Alex Bennée" <alex.bennee@linaro.org>, Peter Maydell <peter.maydell@linaro.org>, Michael Rolnik <mrolnik@gmail.com>, Marcelo Tosatti <mtosatti@redhat.com>, Reinoud Zandijk <reinoud@netbsd.org>, Sunil Muthuswamy <sunilmut@microsoft.com>, Stafford Horne <shorne@gmail.com>, Yoshinori Sato <yoshinori.sato@nifty.com>, David Hildenbrand <david@redhat.com>, Ilya Leoshkevich <iii@linux.ibm.com>, Thomas Huth <thuth@redhat.com>, Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>, Artyom Tarasenko <atar4qemu@gmail.com>
[PATCH 10/18] accel: use atomic accesses for exit_request
Posted by Paolo Bonzini 1 week ago
CPU threads write exit_request as a "note to self" that they need to
go out to a slow path.  This write happens out of the BQL and can be
a data race with another threads' cpu_exit(); use atomic accesses
consistently.

While at it, change the source argument from int ("1") to bool ("true").

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/hw/core/cpu.h             | 9 +++++++++
 accel/kvm/kvm-all.c               | 2 +-
 accel/tcg/tcg-accel-ops-mttcg.c   | 2 +-
 accel/tcg/tcg-accel-ops-rr.c      | 4 ++--
 hw/ppc/spapr_hcall.c              | 6 +++---
 target/i386/kvm/kvm.c             | 6 +++---
 target/i386/nvmm/nvmm-accel-ops.c | 2 +-
 target/i386/nvmm/nvmm-all.c       | 2 +-
 target/i386/whpx/whpx-all.c       | 6 +++---
 9 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8b57bcd92c9..338757e5254 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -422,6 +422,15 @@ struct qemu_work_item;
  * valid under cpu_list_lock.
  * @created: Indicates whether the CPU thread has been successfully created.
  * @halt_cond: condition variable sleeping threads can wait on.
+ * @exit_request: Another thread requests the CPU to call qemu_wait_io_event().
+ *   Should be read only by CPU thread with load-acquire, to synchronize with
+ *   other threads' store-release operation.
+ *
+ *   In some cases, accelerator-specific code will write exit_request from
+ *   within the same thread, to "bump" the effect of qemu_cpu_kick() to
+ *   the one provided by cpu_exit(), especially when processing interrupt
+ *   flags.  In this case, the write and read happen in the same thread
+ *   and the write therefore can use qemu_atomic_set().
  * @interrupt_request: Indicates a pending interrupt request.
  *   Only used by system emulation.
  * @halted: Nonzero if the CPU is in suspended state.
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index bd9e5e3886d..e4167d94b4f 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3730,7 +3730,7 @@ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
     have_sigbus_pending = true;
     pending_sigbus_addr = addr;
     pending_sigbus_code = code;
-    qatomic_set(&cpu->exit_request, 1);
+    qatomic_set(&cpu->exit_request, true);
     return 0;
 #else
     return 1;
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 337b993d3da..b12b7a36b5d 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -85,7 +85,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
     qemu_guest_random_seed_thread_part2(cpu->random_seed);
 
     /* process any pending work */
-    cpu->exit_request = 1;
+    qatomic_set(&cpu->exit_request, true);
 
     do {
         if (cpu_can_run(cpu)) {
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 1e551e92d6d..c2468d15d4f 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -212,7 +212,7 @@ static void *rr_cpu_thread_fn(void *arg)
     cpu = first_cpu;
 
     /* process any pending work */
-    cpu->exit_request = 1;
+    qatomic_set(&cpu->exit_request, true);
 
     while (1) {
         /* Only used for icount_enabled() */
@@ -286,7 +286,7 @@ static void *rr_cpu_thread_fn(void *arg)
         /* Does not need a memory barrier because a spurious wakeup is okay.  */
         qatomic_set(&rr_current_cpu, NULL);
 
-        if (cpu && cpu->exit_request) {
+        if (cpu && qatomic_read(&cpu->exit_request)) {
             qatomic_set_mb(&cpu->exit_request, 0);
         }
 
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 1e936f35e44..51875e32a09 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -509,7 +509,7 @@ static target_ulong h_cede(PowerPCCPU *cpu, SpaprMachineState *spapr,
     if (!cpu_has_work(cs)) {
         cs->halted = 1;
         cs->exception_index = EXCP_HLT;
-        cs->exit_request = 1;
+        qatomic_set(&cs->exit_request, true);
         ppc_maybe_interrupt(env);
     }
 
@@ -531,7 +531,7 @@ static target_ulong h_confer_self(PowerPCCPU *cpu)
     }
     cs->halted = 1;
     cs->exception_index = EXCP_HALTED;
-    cs->exit_request = 1;
+    qatomic_set(&cs->exit_request, true);
     ppc_maybe_interrupt(&cpu->env);
 
     return H_SUCCESS;
@@ -624,7 +624,7 @@ static target_ulong h_confer(PowerPCCPU *cpu, SpaprMachineState *spapr,
     }
 
     cs->exception_index = EXCP_YIELD;
-    cs->exit_request = 1;
+    qatomic_set(&cs->exit_request, true);
     cpu_loop_exit(cs);
 
     return H_SUCCESS;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 8420c4090ef..34e74f24470 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -5486,10 +5486,10 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
     if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
         if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) &&
             !(env->hflags & HF_SMM_MASK)) {
-            qatomic_set(&cpu->exit_request, 1);
+            qatomic_set(&cpu->exit_request, true);
         }
         if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) {
-            qatomic_set(&cpu->exit_request, 1);
+            qatomic_set(&cpu->exit_request, true);
         }
     }
 
@@ -5604,7 +5604,7 @@ int kvm_arch_process_async_events(CPUState *cs)
         if (env->exception_nr == EXCP08_DBLE) {
             /* this means triple fault */
             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
-            cs->exit_request = 1;
+            qatomic_set(&cs->exit_request, true);
             return 0;
         }
         kvm_queue_exception(env, EXCP12_MCHK, 0, 0);
diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
index 3799260bbde..86869f133e9 100644
--- a/target/i386/nvmm/nvmm-accel-ops.c
+++ b/target/i386/nvmm/nvmm-accel-ops.c
@@ -77,7 +77,7 @@ static void nvmm_start_vcpu_thread(CPUState *cpu)
  */
 static void nvmm_kick_vcpu_thread(CPUState *cpu)
 {
-    cpu->exit_request = 1;
+    qatomic_set(&cpu->exit_request, true);
     cpus_kick_thread(cpu);
 }
 
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index 10bd51d9b59..7e36c42fbb4 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -414,7 +414,7 @@ nvmm_vcpu_pre_run(CPUState *cpu)
      * or commit pending TPR access.
      */
     if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
-        cpu->exit_request = 1;
+        qatomic_set(&cpu->exit_request, true);
     }
 
     if (!has_event && cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) {
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index 2106c29c3a0..00fb7e23100 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -1489,10 +1489,10 @@ static void whpx_vcpu_pre_run(CPUState *cpu)
     if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
         if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) &&
             !(env->hflags & HF_SMM_MASK)) {
-            cpu->exit_request = 1;
+            qatomic_set(&cpu->exit_request, true);
         }
         if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) {
-            cpu->exit_request = 1;
+            qatomic_set(&cpu->exit_request, true);
         }
     }
 
@@ -1539,7 +1539,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu)
     if (tpr != vcpu->tpr) {
         vcpu->tpr = tpr;
         reg_values[reg_count].Reg64 = tpr;
-        cpu->exit_request = 1;
+        qatomic_set(&cpu->exit_request, true);
         reg_names[reg_count] = WHvX64RegisterCr8;
         reg_count += 1;
     }
-- 
2.51.0


Re: [PATCH 10/18] accel: use atomic accesses for exit_request
Posted by Igor Mammedov 4 days, 14 hours ago
On Fri, 29 Aug 2025 17:31:07 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> CPU threads write exit_request as a "note to self" that they need to
> go out to a slow path.  This write happens out of the BQL and can be
> a data race with another threads' cpu_exit(); use atomic accesses
> consistently.
> 
> While at it, change the source argument from int ("1") to bool ("true").
> 
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Reviewed-by: Peter Xu <peterx@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Igor Mammedov <imammedo@redhat.com>

> ---
>  include/hw/core/cpu.h             | 9 +++++++++
>  accel/kvm/kvm-all.c               | 2 +-
>  accel/tcg/tcg-accel-ops-mttcg.c   | 2 +-
>  accel/tcg/tcg-accel-ops-rr.c      | 4 ++--
>  hw/ppc/spapr_hcall.c              | 6 +++---
>  target/i386/kvm/kvm.c             | 6 +++---
>  target/i386/nvmm/nvmm-accel-ops.c | 2 +-
>  target/i386/nvmm/nvmm-all.c       | 2 +-
>  target/i386/whpx/whpx-all.c       | 6 +++---
>  9 files changed, 24 insertions(+), 15 deletions(-)
> 
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 8b57bcd92c9..338757e5254 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -422,6 +422,15 @@ struct qemu_work_item;
>   * valid under cpu_list_lock.
>   * @created: Indicates whether the CPU thread has been successfully created.
>   * @halt_cond: condition variable sleeping threads can wait on.
> + * @exit_request: Another thread requests the CPU to call qemu_wait_io_event().
> + *   Should be read only by CPU thread with load-acquire, to synchronize with
> + *   other threads' store-release operation.
> + *
> + *   In some cases, accelerator-specific code will write exit_request from
> + *   within the same thread, to "bump" the effect of qemu_cpu_kick() to
> + *   the one provided by cpu_exit(), especially when processing interrupt
> + *   flags.  In this case, the write and read happen in the same thread
> + *   and the write therefore can use qemu_atomic_set().
>   * @interrupt_request: Indicates a pending interrupt request.
>   *   Only used by system emulation.
>   * @halted: Nonzero if the CPU is in suspended state.
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index bd9e5e3886d..e4167d94b4f 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -3730,7 +3730,7 @@ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
>      have_sigbus_pending = true;
>      pending_sigbus_addr = addr;
>      pending_sigbus_code = code;
> -    qatomic_set(&cpu->exit_request, 1);
> +    qatomic_set(&cpu->exit_request, true);
>      return 0;
>  #else
>      return 1;
> diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
> index 337b993d3da..b12b7a36b5d 100644
> --- a/accel/tcg/tcg-accel-ops-mttcg.c
> +++ b/accel/tcg/tcg-accel-ops-mttcg.c
> @@ -85,7 +85,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
>      qemu_guest_random_seed_thread_part2(cpu->random_seed);
>  
>      /* process any pending work */
> -    cpu->exit_request = 1;
> +    qatomic_set(&cpu->exit_request, true);
>  
>      do {
>          if (cpu_can_run(cpu)) {
> diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
> index 1e551e92d6d..c2468d15d4f 100644
> --- a/accel/tcg/tcg-accel-ops-rr.c
> +++ b/accel/tcg/tcg-accel-ops-rr.c
> @@ -212,7 +212,7 @@ static void *rr_cpu_thread_fn(void *arg)
>      cpu = first_cpu;
>  
>      /* process any pending work */
> -    cpu->exit_request = 1;
> +    qatomic_set(&cpu->exit_request, true);
>  
>      while (1) {
>          /* Only used for icount_enabled() */
> @@ -286,7 +286,7 @@ static void *rr_cpu_thread_fn(void *arg)
>          /* Does not need a memory barrier because a spurious wakeup is okay.  */
>          qatomic_set(&rr_current_cpu, NULL);
>  
> -        if (cpu && cpu->exit_request) {
> +        if (cpu && qatomic_read(&cpu->exit_request)) {
>              qatomic_set_mb(&cpu->exit_request, 0);
>          }
>  
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 1e936f35e44..51875e32a09 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -509,7 +509,7 @@ static target_ulong h_cede(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      if (!cpu_has_work(cs)) {
>          cs->halted = 1;
>          cs->exception_index = EXCP_HLT;
> -        cs->exit_request = 1;
> +        qatomic_set(&cs->exit_request, true);
>          ppc_maybe_interrupt(env);
>      }
>  
> @@ -531,7 +531,7 @@ static target_ulong h_confer_self(PowerPCCPU *cpu)
>      }
>      cs->halted = 1;
>      cs->exception_index = EXCP_HALTED;
> -    cs->exit_request = 1;
> +    qatomic_set(&cs->exit_request, true);
>      ppc_maybe_interrupt(&cpu->env);
>  
>      return H_SUCCESS;
> @@ -624,7 +624,7 @@ static target_ulong h_confer(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      }
>  
>      cs->exception_index = EXCP_YIELD;
> -    cs->exit_request = 1;
> +    qatomic_set(&cs->exit_request, true);
>      cpu_loop_exit(cs);
>  
>      return H_SUCCESS;
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 8420c4090ef..34e74f24470 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -5486,10 +5486,10 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
>      if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
>          if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) &&
>              !(env->hflags & HF_SMM_MASK)) {
> -            qatomic_set(&cpu->exit_request, 1);
> +            qatomic_set(&cpu->exit_request, true);
>          }
>          if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) {
> -            qatomic_set(&cpu->exit_request, 1);
> +            qatomic_set(&cpu->exit_request, true);
>          }
>      }
>  
> @@ -5604,7 +5604,7 @@ int kvm_arch_process_async_events(CPUState *cs)
>          if (env->exception_nr == EXCP08_DBLE) {
>              /* this means triple fault */
>              qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
> -            cs->exit_request = 1;
> +            qatomic_set(&cs->exit_request, true);
>              return 0;
>          }
>          kvm_queue_exception(env, EXCP12_MCHK, 0, 0);
> diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
> index 3799260bbde..86869f133e9 100644
> --- a/target/i386/nvmm/nvmm-accel-ops.c
> +++ b/target/i386/nvmm/nvmm-accel-ops.c
> @@ -77,7 +77,7 @@ static void nvmm_start_vcpu_thread(CPUState *cpu)
>   */
>  static void nvmm_kick_vcpu_thread(CPUState *cpu)
>  {
> -    cpu->exit_request = 1;
> +    qatomic_set(&cpu->exit_request, true);
>      cpus_kick_thread(cpu);
>  }
>  
> diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
> index 10bd51d9b59..7e36c42fbb4 100644
> --- a/target/i386/nvmm/nvmm-all.c
> +++ b/target/i386/nvmm/nvmm-all.c
> @@ -414,7 +414,7 @@ nvmm_vcpu_pre_run(CPUState *cpu)
>       * or commit pending TPR access.
>       */
>      if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
> -        cpu->exit_request = 1;
> +        qatomic_set(&cpu->exit_request, true);
>      }
>  
>      if (!has_event && cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) {
> diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
> index 2106c29c3a0..00fb7e23100 100644
> --- a/target/i386/whpx/whpx-all.c
> +++ b/target/i386/whpx/whpx-all.c
> @@ -1489,10 +1489,10 @@ static void whpx_vcpu_pre_run(CPUState *cpu)
>      if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
>          if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) &&
>              !(env->hflags & HF_SMM_MASK)) {
> -            cpu->exit_request = 1;
> +            qatomic_set(&cpu->exit_request, true);
>          }
>          if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) {
> -            cpu->exit_request = 1;
> +            qatomic_set(&cpu->exit_request, true);
>          }
>      }
>  
> @@ -1539,7 +1539,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu)
>      if (tpr != vcpu->tpr) {
>          vcpu->tpr = tpr;
>          reg_values[reg_count].Reg64 = tpr;
> -        cpu->exit_request = 1;
> +        qatomic_set(&cpu->exit_request, true);
>          reg_names[reg_count] = WHvX64RegisterCr8;
>          reg_count += 1;
>      }