[PULL 20/30] accel/hvf: Implement WFI without using pselect()

Philippe Mathieu-Daudé posted 30 patches 3 weeks ago
Maintainers: "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Thomas Huth <thuth@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, Cameron Esfahani <dirty@apple.com>, Roman Bolshakov <rbolshakov@ddn.com>, Phil Dennis-Jordan <phil@philjordan.eu>, Mads Ynddal <mads@ynddal.dk>, "Dr. David Alan Gilbert" <dave@treblig.org>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Hyman Huang <yong.huang@smartx.com>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Peter Maydell <peter.maydell@linaro.org>, Alexander Graf <agraf@csgraf.de>, Reinoud Zandijk <reinoud@netbsd.org>, Kohei Tokunaga <ktokunaga.mail@gmail.com>, Laurent Vivier <lvivier@redhat.com>
[PULL 20/30] accel/hvf: Implement WFI without using pselect()
Posted by Philippe Mathieu-Daudé 3 weeks ago
Return to the main loop where we'll be waken again.
This avoid a tricky race with signals introduced in
commit 219c101fa7f ("Add HVF WFI handler").

Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
Message-ID: <20260112103034.65310-14-philmd@linaro.org>
---
 include/system/hvf_int.h  |  1 -
 accel/hvf/hvf-accel-ops.c |  2 --
 target/arm/hvf/hvf.c      | 74 +++------------------------------------
 3 files changed, 5 insertions(+), 72 deletions(-)

diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h
index d842d4b2b99..c8e407a1463 100644
--- a/include/system/hvf_int.h
+++ b/include/system/hvf_int.h
@@ -47,7 +47,6 @@ struct AccelCPUState {
 #ifdef __aarch64__
     hv_vcpu_exit_t *exit;
     bool vtimer_masked;
-    sigset_t unblock_ipi_mask;
     bool guest_debug_enabled;
 #endif
 };
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index d931412975c..ffcfe9663b5 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -135,8 +135,6 @@ static int hvf_init_vcpu(CPUState *cpu)
     sigaction(SIG_IPI, &sigact, NULL);
 
 #ifdef __aarch64__
-    pthread_sigmask(SIG_BLOCK, NULL, &cpu->accel->unblock_ipi_mask);
-    sigdelset(&cpu->accel->unblock_ipi_mask, SIG_IPI);
     cpu->accel->guest_debug_enabled = false;
 
     r = hv_vcpu_create(&cpu->accel->fd,
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index d74703a3d55..b936098d257 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -301,7 +301,7 @@ void hvf_arm_init_debug(void)
 #define TMR_CTL_IMASK   (1 << 1)
 #define TMR_CTL_ISTATUS (1 << 2)
 
-static void hvf_wfi(CPUState *cpu);
+static int hvf_wfi(CPUState *cpu);
 
 static uint32_t chosen_ipa_bit_size;
 
@@ -1703,81 +1703,17 @@ static uint64_t hvf_vtimer_val_raw(void)
     return mach_absolute_time() - hvf_state->vtimer_offset;
 }
 
-static uint64_t hvf_vtimer_val(void)
+static int hvf_wfi(CPUState *cpu)
 {
-    if (!runstate_is_running()) {
-        /* VM is paused, the vtimer value is in vtimer.vtimer_val */
-        return vtimer.vtimer_val;
-    }
-
-    return hvf_vtimer_val_raw();
-}
-
-static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
-{
-    /*
-     * Use pselect to sleep so that other threads can IPI us while we're
-     * sleeping.
-     */
-    qatomic_set_mb(&cpu->thread_kicked, false);
-    bql_unlock();
-    pselect(0, 0, 0, 0, ts, &cpu->accel->unblock_ipi_mask);
-    bql_lock();
-}
-
-static void hvf_wfi(CPUState *cpu)
-{
-    ARMCPU *arm_cpu = ARM_CPU(cpu);
-    struct timespec ts;
-    hv_return_t r;
-    uint64_t ctl;
-    uint64_t cval;
-    int64_t ticks_to_sleep;
-    uint64_t seconds;
-    uint64_t nanos;
-    uint32_t cntfrq;
-
     if (cpu_has_work(cpu)) {
         /*
          * Don't bother to go into our "low power state" if
          * we would just wake up immediately.
          */
-        return;
+        return 0;
     }
 
-    r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
-    assert_hvf_ok(r);
-
-    if (!(ctl & 1) || (ctl & 2)) {
-        /* Timer disabled or masked, just wait for an IPI. */
-        hvf_wait_for_ipi(cpu, NULL);
-        return;
-    }
-
-    r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
-    assert_hvf_ok(r);
-
-    ticks_to_sleep = cval - hvf_vtimer_val();
-    if (ticks_to_sleep < 0) {
-        return;
-    }
-
-    cntfrq = gt_cntfrq_period_ns(arm_cpu);
-    seconds = muldiv64(ticks_to_sleep, cntfrq, NANOSECONDS_PER_SECOND);
-    ticks_to_sleep -= muldiv64(seconds, NANOSECONDS_PER_SECOND, cntfrq);
-    nanos = ticks_to_sleep * cntfrq;
-
-    /*
-     * Don't sleep for less than the time a context switch would take,
-     * so that we can satisfy fast timer requests on the same CPU.
-     * Measurements on M1 show the sweet spot to be ~2ms.
-     */
-    if (!seconds && nanos < (2 * SCALE_MS)) {
-        return;
-    }
-
-    ts = (struct timespec) { seconds, nanos };
-    hvf_wait_for_ipi(cpu, &ts);
+    return EXCP_HLT;
 }
 
 /* Must be called by the owning thread */
@@ -1967,7 +1903,7 @@ static int hvf_handle_exception(CPUState *cpu, hv_vcpu_exit_exception_t *excp)
     case EC_WFX_TRAP:
         advance_pc = true;
         if (!(syndrome & WFX_IS_WFE)) {
-            hvf_wfi(cpu);
+            ret = hvf_wfi(cpu);
         }
         break;
     case EC_AA64_HVC:
-- 
2.52.0


Re: [PULL 20/30] accel/hvf: Implement WFI without using pselect()
Posted by Zenghui Yu 6 days, 13 hours ago
On 1/16/26 7:17 PM, Philippe Mathieu-Daudé wrote:
> Return to the main loop where we'll be waken again.
> This avoid a tricky race with signals introduced in
> commit 219c101fa7f ("Add HVF WFI handler").
> 
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Reviewed-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> Message-ID: <20260112103034.65310-14-philmd@linaro.org>
> ---
>  include/system/hvf_int.h  |  1 -
>  accel/hvf/hvf-accel-ops.c |  2 --
>  target/arm/hvf/hvf.c      | 74 +++------------------------------------
>  3 files changed, 5 insertions(+), 72 deletions(-)
> 
> diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h
> index d842d4b2b99..c8e407a1463 100644
> --- a/include/system/hvf_int.h
> +++ b/include/system/hvf_int.h
> @@ -47,7 +47,6 @@ struct AccelCPUState {
>  #ifdef __aarch64__
>      hv_vcpu_exit_t *exit;
>      bool vtimer_masked;
> -    sigset_t unblock_ipi_mask;
>      bool guest_debug_enabled;
>  #endif
>  };
> diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
> index d931412975c..ffcfe9663b5 100644
> --- a/accel/hvf/hvf-accel-ops.c
> +++ b/accel/hvf/hvf-accel-ops.c
> @@ -135,8 +135,6 @@ static int hvf_init_vcpu(CPUState *cpu)
>      sigaction(SIG_IPI, &sigact, NULL);
>  
>  #ifdef __aarch64__
> -    pthread_sigmask(SIG_BLOCK, NULL, &cpu->accel->unblock_ipi_mask);
> -    sigdelset(&cpu->accel->unblock_ipi_mask, SIG_IPI);
>      cpu->accel->guest_debug_enabled = false;
>  
>      r = hv_vcpu_create(&cpu->accel->fd,
> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> index d74703a3d55..b936098d257 100644
> --- a/target/arm/hvf/hvf.c
> +++ b/target/arm/hvf/hvf.c
> @@ -301,7 +301,7 @@ void hvf_arm_init_debug(void)
>  #define TMR_CTL_IMASK   (1 << 1)
>  #define TMR_CTL_ISTATUS (1 << 2)
>  
> -static void hvf_wfi(CPUState *cpu);
> +static int hvf_wfi(CPUState *cpu);
>  
>  static uint32_t chosen_ipa_bit_size;
>  
> @@ -1703,81 +1703,17 @@ static uint64_t hvf_vtimer_val_raw(void)
>      return mach_absolute_time() - hvf_state->vtimer_offset;
>  }
>  
> -static uint64_t hvf_vtimer_val(void)
> +static int hvf_wfi(CPUState *cpu)
>  {
> -    if (!runstate_is_running()) {
> -        /* VM is paused, the vtimer value is in vtimer.vtimer_val */
> -        return vtimer.vtimer_val;
> -    }
> -
> -    return hvf_vtimer_val_raw();
> -}
> -
> -static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> -{
> -    /*
> -     * Use pselect to sleep so that other threads can IPI us while we're
> -     * sleeping.
> -     */
> -    qatomic_set_mb(&cpu->thread_kicked, false);
> -    bql_unlock();
> -    pselect(0, 0, 0, 0, ts, &cpu->accel->unblock_ipi_mask);
> -    bql_lock();
> -}
> -
> -static void hvf_wfi(CPUState *cpu)
> -{
> -    ARMCPU *arm_cpu = ARM_CPU(cpu);
> -    struct timespec ts;
> -    hv_return_t r;
> -    uint64_t ctl;
> -    uint64_t cval;
> -    int64_t ticks_to_sleep;
> -    uint64_t seconds;
> -    uint64_t nanos;
> -    uint32_t cntfrq;
> -
>      if (cpu_has_work(cpu)) {
>          /*
>           * Don't bother to go into our "low power state" if
>           * we would just wake up immediately.
>           */
> -        return;
> +        return 0;
>      }
>  
> -    r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
> -    assert_hvf_ok(r);
> -
> -    if (!(ctl & 1) || (ctl & 2)) {
> -        /* Timer disabled or masked, just wait for an IPI. */
> -        hvf_wait_for_ipi(cpu, NULL);
> -        return;
> -    }
> -
> -    r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
> -    assert_hvf_ok(r);
> -
> -    ticks_to_sleep = cval - hvf_vtimer_val();
> -    if (ticks_to_sleep < 0) {
> -        return;
> -    }
> -
> -    cntfrq = gt_cntfrq_period_ns(arm_cpu);
> -    seconds = muldiv64(ticks_to_sleep, cntfrq, NANOSECONDS_PER_SECOND);
> -    ticks_to_sleep -= muldiv64(seconds, NANOSECONDS_PER_SECOND, cntfrq);
> -    nanos = ticks_to_sleep * cntfrq;
> -
> -    /*
> -     * Don't sleep for less than the time a context switch would take,
> -     * so that we can satisfy fast timer requests on the same CPU.
> -     * Measurements on M1 show the sweet spot to be ~2ms.
> -     */
> -    if (!seconds && nanos < (2 * SCALE_MS)) {
> -        return;
> -    }
> -
> -    ts = (struct timespec) { seconds, nanos };
> -    hvf_wait_for_ipi(cpu, &ts);
> +    return EXCP_HLT;
>  }
>  
>  /* Must be called by the owning thread */
> @@ -1967,7 +1903,7 @@ static int hvf_handle_exception(CPUState *cpu, hv_vcpu_exit_exception_t *excp)
>      case EC_WFX_TRAP:
>          advance_pc = true;
>          if (!(syndrome & WFX_IS_WFE)) {
> -            hvf_wfi(cpu);
> +            ret = hvf_wfi(cpu);
>          }
>          break;
>      case EC_AA64_HVC:

Running a Linux guest on M1 (macOS Sequoia 15.7.2) results in a high CPU
utilization - from host's perspective. The guest itself is almost idle.

I traced hvf_wfi() and noticed that the frequency of WFI traps is
extremely high. I *guess* that the vcpus are being woken up frequently,
which is unnecessary most of the time.

I'm not sure if this is expected.

Thanks,
Zenghui