[PATCH 2/4] ppc/pnv: Fix direct controls quiesce

Nicholas Piggin posted 4 patches 11 months, 3 weeks ago
[PATCH 2/4] ppc/pnv: Fix direct controls quiesce
Posted by Nicholas Piggin 11 months, 3 weeks ago
powernv CPUs have a set of control registers that can stop, start, and
do other things to control a thread's execution.

Using this interface to stop a thread puts it into a particular state
that can be queried, and is distinguishable from other things that
might stop the CPU (e.g., going idle, or being debugged via gdb, or
stopped by the monitor).

Add a new flag that can speficially distinguish this state where it
is stopped with control registers. This solves some hangs when
rebooting powernv machines when skiboot is modified to allow QEMU
to use the CPU control facility (that uses controls to bring all
secondaries to a known state).

Fixes: c8891955086 ("ppc/pnv: Implement POWER10 PC xscom registers for direct controls")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

There might still be a bigger issue with how we handle CPU stop
requests. Multiple different sources may want to stop a CPU, there
may be situations where one of them resumes a CPU before all agree?
A stop_request mask or refcount might be a nice way to consolidate
all these.
---
 target/ppc/cpu.h  | 1 +
 hw/ppc/pnv_core.c | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 945af07a64..0b4f1013b8 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1355,6 +1355,7 @@ struct CPUArchState {
      * special way (such as routing some resume causes to 0x100, i.e. sreset).
      */
     bool resume_as_sreset;
+    bool quiesced;
 #endif
 
     /* These resources are used only in TCG */
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index a30693990b..cbfac49862 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -217,8 +217,8 @@ static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr,
     case PNV10_XSCOM_EC_CORE_RAS_STATUS:
         for (i = 0; i < nr_threads; i++) {
             PowerPCCPU *cpu = pc->threads[i];
-            CPUState *cs = CPU(cpu);
-            if (cs->stopped) {
+            CPUPPCState *env = &cpu->env;
+            if (env->quiesced) {
                 val |= PPC_BIT(0 + 8 * i) | PPC_BIT(1 + 8 * i);
             }
         }
@@ -244,20 +244,25 @@ static void pnv_core_power10_xscom_write(void *opaque, hwaddr addr,
         for (i = 0; i < nr_threads; i++) {
             PowerPCCPU *cpu = pc->threads[i];
             CPUState *cs = CPU(cpu);
+            CPUPPCState *env = &cpu->env;
 
             if (val & PPC_BIT(7 + 8 * i)) { /* stop */
                 val &= ~PPC_BIT(7 + 8 * i);
                 cpu_pause(cs);
+                env->quiesced = true;
             }
             if (val & PPC_BIT(6 + 8 * i)) { /* start */
                 val &= ~PPC_BIT(6 + 8 * i);
+                env->quiesced = false;
                 cpu_resume(cs);
             }
             if (val & PPC_BIT(4 + 8 * i)) { /* sreset */
                 val &= ~PPC_BIT(4 + 8 * i);
+                env->quiesced = false;
                 pnv_cpu_do_nmi_resume(cs);
             }
             if (val & PPC_BIT(3 + 8 * i)) { /* clear maint */
+                env->quiesced = false;
                 /*
                  * Hardware has very particular cases for where clear maint
                  * must be used and where start must be used to resume a
-- 
2.45.2
Re: [PATCH 2/4] ppc/pnv: Fix direct controls quiesce
Posted by Miles Glenn 11 months, 3 weeks ago
Reviewed-by: Glenn Miles <milesg@linux.ibm.com>

On Mon, 2024-11-25 at 23:20 +1000, Nicholas Piggin wrote:
> powernv CPUs have a set of control registers that can stop, start,
> and
> do other things to control a thread's execution.
> 
> Using this interface to stop a thread puts it into a particular state
> that can be queried, and is distinguishable from other things that
> might stop the CPU (e.g., going idle, or being debugged via gdb, or
> stopped by the monitor).
> 
> Add a new flag that can speficially distinguish this state where it
> is stopped with control registers. This solves some hangs when
> rebooting powernv machines when skiboot is modified to allow QEMU
> to use the CPU control facility (that uses controls to bring all
> secondaries to a known state).
> 
> Fixes: c8891955086 ("ppc/pnv: Implement POWER10 PC xscom registers
> for direct controls")
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> 
> There might still be a bigger issue with how we handle CPU stop
> requests. Multiple different sources may want to stop a CPU, there
> may be situations where one of them resumes a CPU before all agree?
> A stop_request mask or refcount might be a nice way to consolidate
> all these.
> ---
>  target/ppc/cpu.h  | 1 +
>  hw/ppc/pnv_core.c | 9 +++++++--
>  2 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index 945af07a64..0b4f1013b8 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1355,6 +1355,7 @@ struct CPUArchState {
>       * special way (such as routing some resume causes to 0x100,
> i.e. sreset).
>       */
>      bool resume_as_sreset;
> +    bool quiesced;
>  #endif
>  
>      /* These resources are used only in TCG */
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index a30693990b..cbfac49862 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -217,8 +217,8 @@ static uint64_t pnv_core_power10_xscom_read(void
> *opaque, hwaddr addr,
>      case PNV10_XSCOM_EC_CORE_RAS_STATUS:
>          for (i = 0; i < nr_threads; i++) {
>              PowerPCCPU *cpu = pc->threads[i];
> -            CPUState *cs = CPU(cpu);
> -            if (cs->stopped) {
> +            CPUPPCState *env = &cpu->env;
> +            if (env->quiesced) {
>                  val |= PPC_BIT(0 + 8 * i) | PPC_BIT(1 + 8 * i);
>              }
>          }
> @@ -244,20 +244,25 @@ static void pnv_core_power10_xscom_write(void
> *opaque, hwaddr addr,
>          for (i = 0; i < nr_threads; i++) {
>              PowerPCCPU *cpu = pc->threads[i];
>              CPUState *cs = CPU(cpu);
> +            CPUPPCState *env = &cpu->env;
>  
>              if (val & PPC_BIT(7 + 8 * i)) { /* stop */
>                  val &= ~PPC_BIT(7 + 8 * i);
>                  cpu_pause(cs);
> +                env->quiesced = true;
>              }
>              if (val & PPC_BIT(6 + 8 * i)) { /* start */
>                  val &= ~PPC_BIT(6 + 8 * i);
> +                env->quiesced = false;
>                  cpu_resume(cs);
>              }
>              if (val & PPC_BIT(4 + 8 * i)) { /* sreset */
>                  val &= ~PPC_BIT(4 + 8 * i);
> +                env->quiesced = false;
>                  pnv_cpu_do_nmi_resume(cs);
>              }
>              if (val & PPC_BIT(3 + 8 * i)) { /* clear maint */
> +                env->quiesced = false;
>                  /*
>                   * Hardware has very particular cases for where
> clear maint
>                   * must be used and where start must be used to
> resume a