[patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup

Thomas Gleixner posted 37 patches 11 months, 4 weeks ago
Failed in applying to current master (apply log)
There is a newer version of this series
[patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup
Posted by Thomas Gleixner 11 months, 4 weeks ago
Hi!

This is version 4 of the reworked parallel bringup series. Version 3 can be
found here:

   https://lore.kernel.org/lkml/20230508181633.089804905@linutronix.de

This is just a reiteration to address the following details:

  1) Address review feedback (Peter Zijlstra)

  2) Fix a MIPS related build problem (0day)

Other than that there are no changes and the other details are all the same
as in V3 and V2.

It's also available from git:

    git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug

Diff to V3 below.

Thanks,

	tglx
---
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index f5e0f4235746..90c71d800b59 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -690,7 +690,7 @@ void flush_tlb_one(unsigned long vaddr)
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_one);
 
-#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
 void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
 {
 	if (mp_ops->cleanup_dead_cpu)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0438802031c3..9cd77d319555 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -290,8 +290,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 
 	/*  APIC ID not found in the table. Drop the trampoline lock and bail. */
 	movq	trampoline_lock(%rip), %rax
-	lock
-	btrl	$0, (%rax)
+	movl	$0, (%rax)
 
 1:	cli
 	hlt
@@ -320,8 +319,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	movq	trampoline_lock(%rip), %rax
 	testq	%rax, %rax
 	jz	.Lsetup_gdt
-	lock
-	btrl	$0, (%rax)
+	movl	$0, (%rax)
 
 .Lsetup_gdt:
 	/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5caf4897b507..660709e94823 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -161,31 +161,28 @@ static inline void smpboot_restore_warm_reset_vector(void)
 
 }
 
-/*
- * Report back to the Boot Processor during boot time or to the caller processor
- * during CPU online.
- */
-static void smp_callin(void)
+/* Run the next set of setup steps for the upcoming CPU */
+static void ap_starting(void)
 {
 	int cpuid = smp_processor_id();
 
 	/*
-	 * If waken up by an INIT in an 82489DX configuration the alive
-	 * synchronization guarantees we don't get here before an
-	 * INIT_deassert IPI reaches our local APIC, so it is now safe to
-	 * touch our local APIC.
+	 * If woken up by an INIT in an 82489DX configuration the alive
+	 * synchronization guarantees that the CPU does not reach this
+	 * point before an INIT_deassert IPI reaches the local APIC, so it
+	 * is now safe to touch the local APIC.
 	 *
 	 * Set up this CPU, first the APIC, which is probably redundant on
 	 * most boards.
 	 */
 	apic_ap_setup();
 
-	/* Save our processor parameters. */
+	/* Save the processor parameters. */
 	smp_store_cpu_info(cpuid);
 
 	/*
 	 * The topology information must be up to date before
-	 * calibrate_delay() and notify_cpu_starting().
+	 * notify_cpu_starting().
 	 */
 	set_cpu_sibling_map(cpuid);
 
@@ -197,7 +194,7 @@ static void smp_callin(void)
 
 	/*
 	 * This runs the AP through all the cpuhp states to its target
-	 * state (CPUHP_ONLINE in the case of serial bringup).
+	 * state CPUHP_ONLINE.
 	 */
 	notify_cpu_starting(cpuid);
 }
@@ -274,10 +271,7 @@ static void notrace start_secondary(void *unused)
 	rcu_cpu_starting(raw_smp_processor_id());
 	x86_cpuinit.early_percpu_clock_init();
 
-	smp_callin();
-
-	/* Otherwise gcc will move up smp_processor_id() before cpu_init() */
-	barrier();
+	ap_starting();
 
 	/* Check TSC synchronization with the control CPU. */
 	check_tsc_sync_target();
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 2dfb1c400167..c6de4deec746 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -40,17 +40,13 @@
 .macro LOAD_REALMODE_ESP
 	/*
 	 * Make sure only one CPU fiddles with the realmode stack
-	 */
+	*/
 .Llock_rm\@:
-	btl	$0, tr_lock
-	jnc	2f
-	pause
-	jmp	.Llock_rm\@
+        lock btsl       $0, tr_lock
+        jnc             2f
+        pause
+        jmp             .Llock_rm\@
 2:
-	lock
-	btsl	$0, tr_lock
-	jc	.Llock_rm\@
-
 	# Setup stack
 	movl	$rm_stack_end, %esp
 .endm
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 60b4093fae9e..005f863a3d2b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -294,14 +294,14 @@ enum cpuhp_sync_state {
  * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
  * @state:	The synchronization state to set
  *
- * No synchronization point. Just update of the synchronization state.
+ * No synchronization point. Just update of the synchronization state, but implies
+ * a full barrier so that the AP changes are visible before the control CPU proceeds.
  */
 static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
 {
 	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
-	int sync = atomic_read(st);
 
-	while (!atomic_try_cmpxchg(st, &sync, state));
+	(void)atomic_xchg(st, state);
 }
 
 void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
@@ -829,7 +829,11 @@ static int bringup_cpu(unsigned int cpu)
 	/*
 	 * Some architectures have to walk the irq descriptors to
 	 * setup the vector space for the cpu which comes online.
-	 * Prevent irq alloc/free across the bringup.
+	 *
+	 * Prevent irq alloc/free across the bringup by acquiring the
+	 * sparse irq lock. Hold it until the upcoming CPU completes the
+	 * startup in cpuhp_online_idle() which allows to avoid
+	 * intermediate synchronization points in the architecture code.
 	 */
 	irq_lock_sparse();
Re: [patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup
Posted by Guilherme G. Piccoli 11 months, 3 weeks ago
On 12/05/2023 18:06, Thomas Gleixner wrote:
> Hi!
> 
> This is version 4 of the reworked parallel bringup series. Version 3 can be
> found here:
> 
>    https://lore.kernel.org/lkml/20230508181633.089804905@linutronix.de


Hi Thomas, thanks for series! I was able to test it on the Steam Deck
(on top of 6.4-rc2), and everything is working fine; also tested S3
suspend/resume, working as expected.

Some logs from boot time:


Parallel boot
[    0.239764] smp: Bringing up secondary CPUs ...
[...]
[    0.253130] smp: Brought up 1 node, 8 CPUs


Regular boot (with cpuhp.parallel=0)
[    0.240093] smp: Bringing up secondary CPUs ...
[...]
[    0.253475] smp: Brought up 1 node, 8 CPUs


Feel free to add (to the series):

Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck

Cheers,


Guilherme
Re: [patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup
Posted by Oleksandr Natalenko 11 months, 4 weeks ago
Hello.

On pátek 12. května 2023 23:06:56 CEST Thomas Gleixner wrote:
> Hi!
> 
> This is version 4 of the reworked parallel bringup series. Version 3 can be
> found here:
> 
>    https://lore.kernel.org/lkml/20230508181633.089804905@linutronix.de
> 
> This is just a reiteration to address the following details:
> 
>   1) Address review feedback (Peter Zijlstra)
> 
>   2) Fix a MIPS related build problem (0day)
> 
> Other than that there are no changes and the other details are all the same
> as in V3 and V2.
> 
> It's also available from git:
> 
>     git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug
> 
> Diff to V3 below.
> 
> Thanks,
> 
> 	tglx

With this patchset:

```

[    0.137719] smpboot: Allowing 32 CPUs, 0 hotplug CPUs
[    0.777312] smpboot: CPU0: AMD Ryzen 9 5950X 16-Core Processor (family: 0x19, model: 0x21, stepping: 0x2)
[    0.777896] smpboot: Parallel CPU startup enabled: 0x80000000
```

Seems to survive suspend/resume cycle too.

Hence:

Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>

Thanks.

> ---
> diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> index f5e0f4235746..90c71d800b59 100644
> --- a/arch/mips/kernel/smp.c
> +++ b/arch/mips/kernel/smp.c
> @@ -690,7 +690,7 @@ void flush_tlb_one(unsigned long vaddr)
>  EXPORT_SYMBOL(flush_tlb_page);
>  EXPORT_SYMBOL(flush_tlb_one);
>  
> -#ifdef CONFIG_HOTPLUG_CPU
> +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
>  void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
>  {
>  	if (mp_ops->cleanup_dead_cpu)
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 0438802031c3..9cd77d319555 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -290,8 +290,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
>  
>  	/*  APIC ID not found in the table. Drop the trampoline lock and bail. */
>  	movq	trampoline_lock(%rip), %rax
> -	lock
> -	btrl	$0, (%rax)
> +	movl	$0, (%rax)
>  
>  1:	cli
>  	hlt
> @@ -320,8 +319,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
>  	movq	trampoline_lock(%rip), %rax
>  	testq	%rax, %rax
>  	jz	.Lsetup_gdt
> -	lock
> -	btrl	$0, (%rax)
> +	movl	$0, (%rax)
>  
>  .Lsetup_gdt:
>  	/*
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 5caf4897b507..660709e94823 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -161,31 +161,28 @@ static inline void smpboot_restore_warm_reset_vector(void)
>  
>  }
>  
> -/*
> - * Report back to the Boot Processor during boot time or to the caller processor
> - * during CPU online.
> - */
> -static void smp_callin(void)
> +/* Run the next set of setup steps for the upcoming CPU */
> +static void ap_starting(void)
>  {
>  	int cpuid = smp_processor_id();
>  
>  	/*
> -	 * If waken up by an INIT in an 82489DX configuration the alive
> -	 * synchronization guarantees we don't get here before an
> -	 * INIT_deassert IPI reaches our local APIC, so it is now safe to
> -	 * touch our local APIC.
> +	 * If woken up by an INIT in an 82489DX configuration the alive
> +	 * synchronization guarantees that the CPU does not reach this
> +	 * point before an INIT_deassert IPI reaches the local APIC, so it
> +	 * is now safe to touch the local APIC.
>  	 *
>  	 * Set up this CPU, first the APIC, which is probably redundant on
>  	 * most boards.
>  	 */
>  	apic_ap_setup();
>  
> -	/* Save our processor parameters. */
> +	/* Save the processor parameters. */
>  	smp_store_cpu_info(cpuid);
>  
>  	/*
>  	 * The topology information must be up to date before
> -	 * calibrate_delay() and notify_cpu_starting().
> +	 * notify_cpu_starting().
>  	 */
>  	set_cpu_sibling_map(cpuid);
>  
> @@ -197,7 +194,7 @@ static void smp_callin(void)
>  
>  	/*
>  	 * This runs the AP through all the cpuhp states to its target
> -	 * state (CPUHP_ONLINE in the case of serial bringup).
> +	 * state CPUHP_ONLINE.
>  	 */
>  	notify_cpu_starting(cpuid);
>  }
> @@ -274,10 +271,7 @@ static void notrace start_secondary(void *unused)
>  	rcu_cpu_starting(raw_smp_processor_id());
>  	x86_cpuinit.early_percpu_clock_init();
>  
> -	smp_callin();
> -
> -	/* Otherwise gcc will move up smp_processor_id() before cpu_init() */
> -	barrier();
> +	ap_starting();
>  
>  	/* Check TSC synchronization with the control CPU. */
>  	check_tsc_sync_target();
> diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
> index 2dfb1c400167..c6de4deec746 100644
> --- a/arch/x86/realmode/rm/trampoline_64.S
> +++ b/arch/x86/realmode/rm/trampoline_64.S
> @@ -40,17 +40,13 @@
>  .macro LOAD_REALMODE_ESP
>  	/*
>  	 * Make sure only one CPU fiddles with the realmode stack
> -	 */
> +	*/
>  .Llock_rm\@:
> -	btl	$0, tr_lock
> -	jnc	2f
> -	pause
> -	jmp	.Llock_rm\@
> +        lock btsl       $0, tr_lock
> +        jnc             2f
> +        pause
> +        jmp             .Llock_rm\@
>  2:
> -	lock
> -	btsl	$0, tr_lock
> -	jc	.Llock_rm\@
> -
>  	# Setup stack
>  	movl	$rm_stack_end, %esp
>  .endm
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 60b4093fae9e..005f863a3d2b 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -294,14 +294,14 @@ enum cpuhp_sync_state {
>   * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
>   * @state:	The synchronization state to set
>   *
> - * No synchronization point. Just update of the synchronization state.
> + * No synchronization point. Just update of the synchronization state, but implies
> + * a full barrier so that the AP changes are visible before the control CPU proceeds.
>   */
>  static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
>  {
>  	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
> -	int sync = atomic_read(st);
>  
> -	while (!atomic_try_cmpxchg(st, &sync, state));
> +	(void)atomic_xchg(st, state);
>  }
>  
>  void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
> @@ -829,7 +829,11 @@ static int bringup_cpu(unsigned int cpu)
>  	/*
>  	 * Some architectures have to walk the irq descriptors to
>  	 * setup the vector space for the cpu which comes online.
> -	 * Prevent irq alloc/free across the bringup.
> +	 *
> +	 * Prevent irq alloc/free across the bringup by acquiring the
> +	 * sparse irq lock. Hold it until the upcoming CPU completes the
> +	 * startup in cpuhp_online_idle() which allows to avoid
> +	 * intermediate synchronization points in the architecture code.
>  	 */
>  	irq_lock_sparse();
>  
> 
> 
> 


-- 
Oleksandr Natalenko (post-factum)
Re: [patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup
Posted by Helge Deller 11 months, 3 weeks ago
Hi Thomas,
> On pátek 12. května 2023 23:06:56 CEST Thomas Gleixner wrote:
>> This is version 4 of the reworked parallel bringup series. Version 3 can be
>> found here:
>>
>>     https://lore.kernel.org/lkml/20230508181633.089804905@linutronix.de
>> ...
>>
>> Other than that there are no changes and the other details are all the same
>> as in V3 and V2.
>>
>> It's also available from git:
>>
>>      git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug

I tested your series on the parisc architecture just to make sure that it still works
with your patch applied.
On parisc the CPU bringup happens later in the boot process (after the inventory),
so your patch won't have an direct impact anyway.
But at least everything still works, incl. manual CPU enable/disable.

So, you may add
Tested-by: Helge Deller <deller@gmx.de> # parisc

Thanks!
Helge
[patch V4 01/37] x86/smpboot: Cleanup topology_phys_to_logical_pkg()/die()
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Make topology_phys_to_logical_pkg_die() static as it's only used in
smpboot.c and fixup the kernel-doc warnings for both functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/include/asm/topology.h |    3 ---
 arch/x86/kernel/smpboot.c       |   10 ++++++----
 2 files changed, 6 insertions(+), 7 deletions(-)
---

--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -139,7 +139,6 @@ static inline int topology_max_smt_threa
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 int topology_update_die_map(unsigned int dieid, unsigned int cpu);
 int topology_phys_to_logical_pkg(unsigned int pkg);
-int topology_phys_to_logical_die(unsigned int die, unsigned int cpu);
 bool topology_is_primary_thread(unsigned int cpu);
 bool topology_smt_supported(void);
 #else
@@ -149,8 +148,6 @@ topology_update_package_map(unsigned int
 static inline int
 topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_phys_to_logical_die(unsigned int die,
-		unsigned int cpu) { return 0; }
 static inline int topology_max_die_per_package(void) { return 1; }
 static inline int topology_max_smt_threads(void) { return 1; }
 static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -288,6 +288,7 @@ bool topology_smt_supported(void)
 
 /**
  * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ * @phys_pkg:	The physical package id to map
  *
  * Returns logical package id or -1 if not found
  */
@@ -304,15 +305,17 @@ int topology_phys_to_logical_pkg(unsigne
 	return -1;
 }
 EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
 /**
  * topology_phys_to_logical_die - Map a physical die id to logical
+ * @die_id:	The physical die id to map
+ * @cur_cpu:	The CPU for which the mapping is done
  *
  * Returns logical die id or -1 if not found
  */
-int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
+static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
 {
-	int cpu;
-	int proc_id = cpu_data(cur_cpu).phys_proc_id;
+	int cpu, proc_id = cpu_data(cur_cpu).phys_proc_id;
 
 	for_each_possible_cpu(cpu) {
 		struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -323,7 +326,6 @@ int topology_phys_to_logical_die(unsigne
 	}
 	return -1;
 }
-EXPORT_SYMBOL(topology_phys_to_logical_die);
 
 /**
  * topology_update_package_map - Update the physical to logical package map
[patch V4 02/37] cpu/hotplug: Mark arch_disable_smp_support() and bringup_nonboot_cpus() __init
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

No point in keeping them around.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/kernel/smpboot.c |    4 ++--
 kernel/cpu.c              |    2 +-
 kernel/smp.c              |    2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)


--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1269,9 +1269,9 @@ int native_cpu_up(unsigned int cpu, stru
 }
 
 /**
- * arch_disable_smp_support() - disables SMP support for x86 at runtime
+ * arch_disable_smp_support() - Disables SMP support for x86 at boottime
  */
-void arch_disable_smp_support(void)
+void __init arch_disable_smp_support(void)
 {
 	disable_ioapic_support();
 }
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1502,7 +1502,7 @@ int bringup_hibernate_cpu(unsigned int s
 	return 0;
 }
 
-void bringup_nonboot_cpus(unsigned int setup_max_cpus)
+void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
 {
 	unsigned int cpu;
 
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -892,7 +892,7 @@ EXPORT_SYMBOL(setup_max_cpus);
  * SMP mode to <NUM>.
  */
 
-void __weak arch_disable_smp_support(void) { }
+void __weak __init arch_disable_smp_support(void) { }
 
 static int __init nosmp(char *str)
 {
[patch V4 03/37] x86/smpboot: Avoid pointless delay calibration if TSC is synchronized
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

When TSC is synchronized across sockets then there is no reason to
calibrate the delay for the first CPU which comes up on a socket.

Just reuse the existing calibration value.

This removes 100ms pointlessly wasted time from CPU hotplug per socket.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/kernel/smpboot.c |   40 +++++++++++++++++++++++++---------------
 arch/x86/kernel/tsc.c     |   20 ++++++++++++++++----
 2 files changed, 41 insertions(+), 19 deletions(-)


--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -178,28 +178,17 @@ static void smp_callin(void)
 	 */
 	apic_ap_setup();
 
-	/*
-	 * Save our processor parameters. Note: this information
-	 * is needed for clock calibration.
-	 */
+	/* Save our processor parameters. */
 	smp_store_cpu_info(cpuid);
 
 	/*
 	 * The topology information must be up to date before
-	 * calibrate_delay() and notify_cpu_starting().
+	 * notify_cpu_starting().
 	 */
 	set_cpu_sibling_map(raw_smp_processor_id());
 
 	ap_init_aperfmperf();
 
-	/*
-	 * Get our bogomips.
-	 * Update loops_per_jiffy in cpu_data. Previous call to
-	 * smp_store_cpu_info() stored a value that is close but not as
-	 * accurate as the value just calculated.
-	 */
-	calibrate_delay();
-	cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
 	pr_debug("Stack at about %p\n", &cpuid);
 
 	wmb();
@@ -212,8 +201,24 @@ static void smp_callin(void)
 	cpumask_set_cpu(cpuid, cpu_callin_mask);
 }
 
+static void ap_calibrate_delay(void)
+{
+	/*
+	 * Calibrate the delay loop and update loops_per_jiffy in cpu_data.
+	 * smp_store_cpu_info() stored a value that is close but not as
+	 * accurate as the value just calculated.
+	 *
+	 * As this is invoked after the TSC synchronization check,
+	 * calibrate_delay_is_known() will skip the calibration routine
+	 * when TSC is synchronized across sockets.
+	 */
+	calibrate_delay();
+	cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
+}
+
 static int cpu0_logical_apicid;
 static int enable_start_cpu0;
+
 /*
  * Activate a secondary processor.
  */
@@ -240,10 +245,15 @@ static void notrace start_secondary(void
 
 	/* otherwise gcc will move up smp_processor_id before the cpu_init */
 	barrier();
+	/* Check TSC synchronization with the control CPU: */
+	check_tsc_sync_target();
+
 	/*
-	 * Check TSC synchronization with the boot CPU:
+	 * Calibrate the delay loop after the TSC synchronization check.
+	 * This allows to skip the calibration when TSC is synchronized
+	 * across sockets.
 	 */
-	check_tsc_sync_target();
+	ap_calibrate_delay();
 
 	speculative_store_bypass_ht_init();
 
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1598,10 +1598,7 @@ void __init tsc_init(void)
 
 #ifdef CONFIG_SMP
 /*
- * If we have a constant TSC and are using the TSC for the delay loop,
- * we can skip clock calibration if another cpu in the same socket has already
- * been calibrated. This assumes that CONSTANT_TSC applies to all
- * cpus in the socket - this should be a safe assumption.
+ * Check whether existing calibration data can be reused.
  */
 unsigned long calibrate_delay_is_known(void)
 {
@@ -1609,6 +1606,21 @@ unsigned long calibrate_delay_is_known(v
 	int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
 	const struct cpumask *mask = topology_core_cpumask(cpu);
 
+	/*
+	 * If TSC has constant frequency and TSC is synchronized across
+	 * sockets then reuse CPU0 calibration.
+	 */
+	if (constant_tsc && !tsc_unstable)
+		return cpu_data(0).loops_per_jiffy;
+
+	/*
+	 * If TSC has constant frequency and TSC is not synchronized across
+	 * sockets and this is not the first CPU in the socket, then reuse
+	 * the calibration value of an already online CPU on that socket.
+	 *
+	 * This assumes that CONSTANT_TSC is consistent for all CPUs in a
+	 * socket.
+	 */
 	if (!constant_tsc || !mask)
 		return 0;
[patch V4 04/37] x86/smpboot: Rename start_cpu0() to soft_restart_cpu()
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

This is used in the SEV play_dead() implementation to re-online CPUs. But
that has nothing to do with CPU0.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/include/asm/cpu.h   |    2 +-
 arch/x86/kernel/callthunks.c |    2 +-
 arch/x86/kernel/head_32.S    |   10 +++++-----
 arch/x86/kernel/head_64.S    |   10 +++++-----
 arch/x86/kernel/sev.c        |    2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -30,7 +30,7 @@ struct x86_cpu {
 #ifdef CONFIG_HOTPLUG_CPU
 extern int arch_register_cpu(int num);
 extern void arch_unregister_cpu(int);
-extern void start_cpu0(void);
+extern void soft_restart_cpu(void);
 #ifdef CONFIG_DEBUG_HOTPLUG_CPU0
 extern int _debug_hotplug_cpu(int cpu, int action);
 #endif
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -134,7 +134,7 @@ static bool skip_addr(void *dest)
 	if (dest == ret_from_fork)
 		return true;
 #ifdef CONFIG_HOTPLUG_CPU
-	if (dest == start_cpu0)
+	if (dest == soft_restart_cpu)
 		return true;
 #endif
 #ifdef CONFIG_FUNCTION_TRACER
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -140,16 +140,16 @@ SYM_CODE_END(startup_32)
 
 #ifdef CONFIG_HOTPLUG_CPU
 /*
- * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
- * up already except stack. We just set up stack here. Then call
- * start_secondary().
+ * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for
+ * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot
+ * unplug. Everything is set up already except the stack.
  */
-SYM_FUNC_START(start_cpu0)
+SYM_FUNC_START(soft_restart_cpu)
 	movl initial_stack, %ecx
 	movl %ecx, %esp
 	call *(initial_code)
 1:	jmp 1b
-SYM_FUNC_END(start_cpu0)
+SYM_FUNC_END(soft_restart_cpu)
 #endif
 
 /*
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -377,11 +377,11 @@ SYM_CODE_END(secondary_startup_64)
 
 #ifdef CONFIG_HOTPLUG_CPU
 /*
- * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
- * up already except stack. We just set up stack here. Then call
- * start_secondary() via .Ljump_to_C_code.
+ * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for
+ * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot
+ * unplug. Everything is set up already except the stack.
  */
-SYM_CODE_START(start_cpu0)
+SYM_CODE_START(soft_restart_cpu)
 	ANNOTATE_NOENDBR
 	UNWIND_HINT_END_OF_STACK
 
@@ -390,7 +390,7 @@ SYM_CODE_START(start_cpu0)
 	movq	TASK_threadsp(%rcx), %rsp
 
 	jmp	.Ljump_to_C_code
-SYM_CODE_END(start_cpu0)
+SYM_CODE_END(soft_restart_cpu)
 #endif
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1328,7 +1328,7 @@ static void sev_es_play_dead(void)
 	 * If we get here, the VCPU was woken up again. Jump to CPU
 	 * startup code to get it back online.
 	 */
-	start_cpu0();
+	soft_restart_cpu();
 }
 #else  /* CONFIG_HOTPLUG_CPU */
 #define sev_es_play_dead	native_play_dead
Re: [patch V4 04/37] x86/smpboot: Rename start_cpu0() to soft_restart_cpu()
Posted by Philippe Mathieu-Daudé 10 months, 3 weeks ago
On 12/5/23 23:07, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> This is used in the SEV play_dead() implementation to re-online CPUs. But
> that has nothing to do with CPU0.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Michael Kelley <mikelley@microsoft.com>
> ---
>   arch/x86/include/asm/cpu.h   |    2 +-
>   arch/x86/kernel/callthunks.c |    2 +-
>   arch/x86/kernel/head_32.S    |   10 +++++-----
>   arch/x86/kernel/head_64.S    |   10 +++++-----
>   arch/x86/kernel/sev.c        |    2 +-
>   5 files changed, 13 insertions(+), 13 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>


[patch V4 05/37] x86/topology: Remove CPU0 hotplug option
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

This was introduced together with commit e1c467e69040 ("x86, hotplug: Wake
up CPU0 via NMI instead of INIT, SIPI, SIPI") to eventually support
physical hotplug of CPU0:

 "We'll change this code in the future to wake up hard offlined CPU0 if
  real platform and request are available."

11 years later this has not happened and physical hotplug is not officially
supported. Remove the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 Documentation/admin-guide/kernel-parameters.txt |   14 ---
 Documentation/core-api/cpu_hotplug.rst          |   13 ---
 arch/x86/Kconfig                                |   43 ----------
 arch/x86/include/asm/cpu.h                      |    3 
 arch/x86/kernel/topology.c                      |   98 ------------------------
 arch/x86/power/cpu.c                            |   37 ---------
 6 files changed, 6 insertions(+), 202 deletions(-)

--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -818,20 +818,6 @@
 			Format:
 			<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
 
-	cpu0_hotplug	[X86] Turn on CPU0 hotplug feature when
-			CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
-			Some features depend on CPU0. Known dependencies are:
-			1. Resume from suspend/hibernate depends on CPU0.
-			Suspend/hibernate will fail if CPU0 is offline and you
-			need to online CPU0 before suspend/hibernate.
-			2. PIC interrupts also depend on CPU0. CPU0 can't be
-			removed if a PIC interrupt is detected.
-			It's said poweroff/reboot may depend on CPU0 on some
-			machines although I haven't seen such issues so far
-			after CPU0 is offline on a few tested machines.
-			If the dependencies are under your control, you can
-			turn on cpu0_hotplug.
-
 	cpuidle.off=1	[CPU_IDLE]
 			disable the cpuidle sub-system
 
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -127,17 +127,8 @@ Once the CPU is shutdown, it will be rem
  $ echo 1 > /sys/devices/system/cpu/cpu4/online
  smpboot: Booting Node 0 Processor 4 APIC 0x1
 
-The CPU is usable again. This should work on all CPUs. CPU0 is often special
-and excluded from CPU hotplug. On X86 the kernel option
-*CONFIG_BOOTPARAM_HOTPLUG_CPU0* has to be enabled in order to be able to
-shutdown CPU0. Alternatively the kernel command option *cpu0_hotplug* can be
-used. Some known dependencies of CPU0:
-
-* Resume from hibernate/suspend. Hibernate/suspend will fail if CPU0 is offline.
-* PIC interrupts. CPU0 can't be removed if a PIC interrupt is detected.
-
-Please let Fenghua Yu <fenghua.yu@intel.com> know if you find any dependencies
-on CPU0.
+The CPU is usable again. This should work on all CPUs, but CPU0 is often special
+and excluded from CPU hotplug.
 
 The CPU hotplug coordination
 ============================
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2305,49 +2305,6 @@ config HOTPLUG_CPU
 	def_bool y
 	depends on SMP
 
-config BOOTPARAM_HOTPLUG_CPU0
-	bool "Set default setting of cpu0_hotpluggable"
-	depends on HOTPLUG_CPU
-	help
-	  Set whether default state of cpu0_hotpluggable is on or off.
-
-	  Say Y here to enable CPU0 hotplug by default. If this switch
-	  is turned on, there is no need to give cpu0_hotplug kernel
-	  parameter and the CPU0 hotplug feature is enabled by default.
-
-	  Please note: there are two known CPU0 dependencies if you want
-	  to enable the CPU0 hotplug feature either by this switch or by
-	  cpu0_hotplug kernel parameter.
-
-	  First, resume from hibernate or suspend always starts from CPU0.
-	  So hibernate and suspend are prevented if CPU0 is offline.
-
-	  Second dependency is PIC interrupts always go to CPU0. CPU0 can not
-	  offline if any interrupt can not migrate out of CPU0. There may
-	  be other CPU0 dependencies.
-
-	  Please make sure the dependencies are under your control before
-	  you enable this feature.
-
-	  Say N if you don't want to enable CPU0 hotplug feature by default.
-	  You still can enable the CPU0 hotplug feature at boot by kernel
-	  parameter cpu0_hotplug.
-
-config DEBUG_HOTPLUG_CPU0
-	def_bool n
-	prompt "Debug CPU0 hotplug"
-	depends on HOTPLUG_CPU
-	help
-	  Enabling this option offlines CPU0 (if CPU0 can be offlined) as
-	  soon as possible and boots up userspace with CPU0 offlined. User
-	  can online CPU0 back after boot time.
-
-	  To debug CPU0 hotplug, you need to enable CPU0 offline/online
-	  feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during
-	  compilation or giving cpu0_hotplug kernel parameter at boot.
-
-	  If unsure, say N.
-
 config COMPAT_VDSO
 	def_bool n
 	prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -31,9 +31,6 @@ struct x86_cpu {
 extern int arch_register_cpu(int num);
 extern void arch_unregister_cpu(int);
 extern void soft_restart_cpu(void);
-#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
-extern int _debug_hotplug_cpu(int cpu, int action);
-#endif
 #endif
 
 extern void ap_init_aperfmperf(void);
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -38,102 +38,12 @@
 static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
 
 #ifdef CONFIG_HOTPLUG_CPU
-
-#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0
-static int cpu0_hotpluggable = 1;
-#else
-static int cpu0_hotpluggable;
-static int __init enable_cpu0_hotplug(char *str)
-{
-	cpu0_hotpluggable = 1;
-	return 1;
-}
-
-__setup("cpu0_hotplug", enable_cpu0_hotplug);
-#endif
-
-#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
-/*
- * This function offlines a CPU as early as possible and allows userspace to
- * boot up without the CPU. The CPU can be onlined back by user after boot.
- *
- * This is only called for debugging CPU offline/online feature.
- */
-int _debug_hotplug_cpu(int cpu, int action)
-{
-	int ret;
-
-	if (!cpu_is_hotpluggable(cpu))
-		return -EINVAL;
-
-	switch (action) {
-	case 0:
-		ret = remove_cpu(cpu);
-		if (!ret)
-			pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu);
-		else
-			pr_debug("Can't offline CPU%d.\n", cpu);
-		break;
-	case 1:
-		ret = add_cpu(cpu);
-		if (ret)
-			pr_debug("Can't online CPU%d.\n", cpu);
-
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static int __init debug_hotplug_cpu(void)
+int arch_register_cpu(int cpu)
 {
-	_debug_hotplug_cpu(0, 0);
-	return 0;
-}
-
-late_initcall_sync(debug_hotplug_cpu);
-#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */
-
-int arch_register_cpu(int num)
-{
-	struct cpuinfo_x86 *c = &cpu_data(num);
-
-	/*
-	 * Currently CPU0 is only hotpluggable on Intel platforms. Other
-	 * vendors can add hotplug support later.
-	 * Xen PV guests don't support CPU0 hotplug at all.
-	 */
-	if (c->x86_vendor != X86_VENDOR_INTEL ||
-	    cpu_feature_enabled(X86_FEATURE_XENPV))
-		cpu0_hotpluggable = 0;
-
-	/*
-	 * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate
-	 * depends on BSP. PIC interrupts depend on BSP.
-	 *
-	 * If the BSP dependencies are under control, one can tell kernel to
-	 * enable BSP hotplug. This basically adds a control file and
-	 * one can attempt to offline BSP.
-	 */
-	if (num == 0 && cpu0_hotpluggable) {
-		unsigned int irq;
-		/*
-		 * We won't take down the boot processor on i386 if some
-		 * interrupts only are able to be serviced by the BSP in PIC.
-		 */
-		for_each_active_irq(irq) {
-			if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) {
-				cpu0_hotpluggable = 0;
-				break;
-			}
-		}
-	}
-	if (num || cpu0_hotpluggable)
-		per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
+	struct x86_cpu *xc = per_cpu_ptr(&cpu_devices, cpu);
 
-	return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
+	xc->cpu.hotpluggable = cpu > 0;
+	return register_cpu(&xc->cpu, cpu);
 }
 EXPORT_SYMBOL(arch_register_cpu);
 
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -351,43 +351,6 @@ static int bsp_pm_callback(struct notifi
 	case PM_HIBERNATION_PREPARE:
 		ret = bsp_check();
 		break;
-#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
-	case PM_RESTORE_PREPARE:
-		/*
-		 * When system resumes from hibernation, online CPU0 because
-		 * 1. it's required for resume and
-		 * 2. the CPU was online before hibernation
-		 */
-		if (!cpu_online(0))
-			_debug_hotplug_cpu(0, 1);
-		break;
-	case PM_POST_RESTORE:
-		/*
-		 * When a resume really happens, this code won't be called.
-		 *
-		 * This code is called only when user space hibernation software
-		 * prepares for snapshot device during boot time. So we just
-		 * call _debug_hotplug_cpu() to restore to CPU0's state prior to
-		 * preparing the snapshot device.
-		 *
-		 * This works for normal boot case in our CPU0 hotplug debug
-		 * mode, i.e. CPU0 is offline and user mode hibernation
-		 * software initializes during boot time.
-		 *
-		 * If CPU0 is online and user application accesses snapshot
-		 * device after boot time, this will offline CPU0 and user may
-		 * see different CPU0 state before and after accessing
-		 * the snapshot device. But hopefully this is not a case when
-		 * user debugging CPU0 hotplug. Even if users hit this case,
-		 * they can easily online CPU0 back.
-		 *
-		 * To simplify this debug code, we only consider normal boot
-		 * case. Otherwise we need to remember CPU0's state and restore
-		 * to that state and resolve racy conditions etc.
-		 */
-		_debug_hotplug_cpu(0, 0);
-		break;
-#endif
 	default:
 		break;
 	}
[patch V4 06/37] x86/smpboot: Remove the CPU0 hotplug kludge
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

This was introduced with commit e1c467e69040 ("x86, hotplug: Wake up CPU0
via NMI instead of INIT, SIPI, SIPI") to eventually support physical
hotplug of CPU0:

 "We'll change this code in the future to wake up hard offlined CPU0 if
  real platform and request are available."

11 years later this has not happened and physical hotplug is not officially
supported. Remove the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/include/asm/apic.h   |    1 
 arch/x86/include/asm/smp.h    |    1 
 arch/x86/kernel/smpboot.c     |  170 +++---------------------------------------
 drivers/acpi/processor_idle.c |    4 
 4 files changed, 14 insertions(+), 162 deletions(-)

--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -377,7 +377,6 @@ extern struct apic *__apicdrivers[], *__
  * APIC functionality to boot other CPUs - only used on SMP:
  */
 #ifdef CONFIG_SMP
-extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
 extern int lapic_can_unplug_cpu(void);
 #endif
 
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -130,7 +130,6 @@ void native_play_dead(void);
 void play_dead_common(void);
 void wbinvd_on_cpu(int cpu);
 int wbinvd_on_all_cpus(void);
-void cond_wakeup_cpu0(void);
 
 void native_smp_send_reschedule(int cpu);
 void native_send_call_func_ipi(const struct cpumask *mask);
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -216,9 +216,6 @@ static void ap_calibrate_delay(void)
 	cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
 }
 
-static int cpu0_logical_apicid;
-static int enable_start_cpu0;
-
 /*
  * Activate a secondary processor.
  */
@@ -241,8 +238,6 @@ static void notrace start_secondary(void
 	x86_cpuinit.early_percpu_clock_init();
 	smp_callin();
 
-	enable_start_cpu0 = 0;
-
 	/* otherwise gcc will move up smp_processor_id before the cpu_init */
 	barrier();
 	/* Check TSC synchronization with the control CPU: */
@@ -410,7 +405,7 @@ void smp_store_cpu_info(int id)
 	c->cpu_index = id;
 	/*
 	 * During boot time, CPU0 has this setup already. Save the info when
-	 * bringing up AP or offlined CPU0.
+	 * bringing up an AP.
 	 */
 	identify_secondary_cpu(c);
 	c->initialized = true;
@@ -807,51 +802,14 @@ static void __init smp_quirk_init_udelay
 }
 
 /*
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
- */
-int
-wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
-{
-	u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
-	unsigned long send_status, accept_status = 0;
-	int maxlvt;
-
-	/* Target chip */
-	/* Boot on the stack */
-	/* Kick the second */
-	apic_icr_write(APIC_DM_NMI | dm, apicid);
-
-	pr_debug("Waiting for send to finish...\n");
-	send_status = safe_apic_wait_icr_idle();
-
-	/*
-	 * Give the other CPU some time to accept the IPI.
-	 */
-	udelay(200);
-	if (APIC_INTEGRATED(boot_cpu_apic_version)) {
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)			/* Due to the Pentium erratum 3AP.  */
-			apic_write(APIC_ESR, 0);
-		accept_status = (apic_read(APIC_ESR) & 0xEF);
-	}
-	pr_debug("NMI sent\n");
-
-	if (send_status)
-		pr_err("APIC never delivered???\n");
-	if (accept_status)
-		pr_err("APIC delivery error (%lx)\n", accept_status);
-
-	return (send_status | accept_status);
-}
-
-static int
-wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
+ * Wake up AP by INIT, INIT, STARTUP sequence.
+ */
+static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 {
 	unsigned long send_status = 0, accept_status = 0;
 	int maxlvt, num_starts, j;
 
+	preempt_disable();
 	maxlvt = lapic_get_maxlvt();
 
 	/*
@@ -957,6 +915,7 @@ wakeup_secondary_cpu_via_init(int phys_a
 	if (accept_status)
 		pr_err("APIC delivery error (%lx)\n", accept_status);
 
+	preempt_enable();
 	return (send_status | accept_status);
 }
 
@@ -997,67 +956,6 @@ static void announce_cpu(int cpu, int ap
 			node, cpu, apicid);
 }
 
-static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
-{
-	int cpu;
-
-	cpu = smp_processor_id();
-	if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
-		return NMI_HANDLED;
-
-	return NMI_DONE;
-}
-
-/*
- * Wake up AP by INIT, INIT, STARTUP sequence.
- *
- * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
- * boot-strap code which is not a desired behavior for waking up BSP. To
- * void the boot-strap code, wake up CPU0 by NMI instead.
- *
- * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
- * (i.e. physically hot removed and then hot added), NMI won't wake it up.
- * We'll change this code in the future to wake up hard offlined CPU0 if
- * real platform and request are available.
- */
-static int
-wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
-	       int *cpu0_nmi_registered)
-{
-	int id;
-	int boot_error;
-
-	preempt_disable();
-
-	/*
-	 * Wake up AP by INIT, INIT, STARTUP sequence.
-	 */
-	if (cpu) {
-		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
-		goto out;
-	}
-
-	/*
-	 * Wake up BSP by nmi.
-	 *
-	 * Register a NMI handler to help wake up CPU0.
-	 */
-	boot_error = register_nmi_handler(NMI_LOCAL,
-					  wakeup_cpu0_nmi, 0, "wake_cpu0");
-
-	if (!boot_error) {
-		enable_start_cpu0 = 1;
-		*cpu0_nmi_registered = 1;
-		id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid;
-		boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
-	}
-
-out:
-	preempt_enable();
-
-	return boot_error;
-}
-
 int common_cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int ret;
@@ -1086,8 +984,7 @@ int common_cpu_up(unsigned int cpu, stru
  * Returns zero if CPU booted OK, else error code from
  * ->wakeup_secondary_cpu.
  */
-static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
-		       int *cpu0_nmi_registered)
+static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 {
 	/* start_ip had better be page-aligned! */
 	unsigned long start_ip = real_mode_header->trampoline_start;
@@ -1120,7 +1017,6 @@ static int do_boot_cpu(int apicid, int c
 	 * This grunge runs the startup process for
 	 * the targeted processor.
 	 */
-
 	if (x86_platform.legacy.warm_reset) {
 
 		pr_debug("Setting warm reset code and vector.\n");
@@ -1149,15 +1045,14 @@ static int do_boot_cpu(int apicid, int c
 	 * - Use a method from the APIC driver if one defined, with wakeup
 	 *   straight to 64-bit mode preferred over wakeup to RM.
 	 * Otherwise,
-	 * - Use an INIT boot APIC message for APs or NMI for BSP.
+	 * - Use an INIT boot APIC message
 	 */
 	if (apic->wakeup_secondary_cpu_64)
 		boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
 	else if (apic->wakeup_secondary_cpu)
 		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
 	else
-		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
-						     cpu0_nmi_registered);
+		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
 
 	if (!boot_error) {
 		/*
@@ -1206,9 +1101,8 @@ static int do_boot_cpu(int apicid, int c
 int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int apicid = apic->cpu_present_to_apicid(cpu);
-	int cpu0_nmi_registered = 0;
 	unsigned long flags;
-	int err, ret = 0;
+	int err;
 
 	lockdep_assert_irqs_enabled();
 
@@ -1247,11 +1141,10 @@ int native_cpu_up(unsigned int cpu, stru
 	if (err)
 		return err;
 
-	err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
+	err = do_boot_cpu(apicid, cpu, tidle);
 	if (err) {
 		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
-		ret = -EIO;
-		goto unreg_nmi;
+		return err;
 	}
 
 	/*
@@ -1267,15 +1160,7 @@ int native_cpu_up(unsigned int cpu, stru
 		touch_nmi_watchdog();
 	}
 
-unreg_nmi:
-	/*
-	 * Clean up the nmi handler. Do this after the callin and callout sync
-	 * to avoid impact of possible long unregister time.
-	 */
-	if (cpu0_nmi_registered)
-		unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
-
-	return ret;
+	return 0;
 }
 
 /**
@@ -1373,14 +1258,6 @@ static void __init smp_cpu_index_default
 	}
 }
 
-static void __init smp_get_logical_apicid(void)
-{
-	if (x2apic_mode)
-		cpu0_logical_apicid = apic_read(APIC_LDR);
-	else
-		cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-}
-
 void __init smp_prepare_cpus_common(void)
 {
 	unsigned int i;
@@ -1443,8 +1320,6 @@ void __init native_smp_prepare_cpus(unsi
 	/* Setup local timer */
 	x86_init.timers.setup_percpu_clockev();
 
-	smp_get_logical_apicid();
-
 	pr_info("CPU0: ");
 	print_cpu_info(&cpu_data(0));
 
@@ -1752,18 +1627,6 @@ void play_dead_common(void)
 	local_irq_disable();
 }
 
-/**
- * cond_wakeup_cpu0 - Wake up CPU0 if needed.
- *
- * If NMI wants to wake up CPU0, start CPU0.
- */
-void cond_wakeup_cpu0(void)
-{
-	if (smp_processor_id() == 0 && enable_start_cpu0)
-		start_cpu0();
-}
-EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
-
 /*
  * We need to flush the caches before going to sleep, lest we have
  * dirty data in our caches when we come back up.
@@ -1831,8 +1694,6 @@ static inline void mwait_play_dead(void)
 		__monitor(mwait_ptr, 0, 0);
 		mb();
 		__mwait(eax, 0);
-
-		cond_wakeup_cpu0();
 	}
 }
 
@@ -1841,11 +1702,8 @@ void __noreturn hlt_play_dead(void)
 	if (__this_cpu_read(cpu_info.x86) >= 4)
 		wbinvd();
 
-	while (1) {
+	while (1)
 		native_halt();
-
-		cond_wakeup_cpu0();
-	}
 }
 
 void native_play_dead(void)
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -597,10 +597,6 @@ static int acpi_idle_play_dead(struct cp
 			io_idle(cx->address);
 		} else
 			return -ENODEV;
-
-#if defined(CONFIG_X86) && defined(CONFIG_HOTPLUG_CPU)
-		cond_wakeup_cpu0();
-#endif
 	}
 
 	/* Never reached */
[patch V4 07/37] x86/smpboot: Restrict soft_restart_cpu() to SEV
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Now that the CPU0 hotplug cruft is gone, the only user is AMD SEV.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/kernel/callthunks.c |    2 +-
 arch/x86/kernel/head_32.S    |   14 --------------
 arch/x86/kernel/head_64.S    |    2 +-
 3 files changed, 2 insertions(+), 16 deletions(-)

--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -133,7 +133,7 @@ static bool skip_addr(void *dest)
 	/* Accounts directly */
 	if (dest == ret_from_fork)
 		return true;
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT)
 	if (dest == soft_restart_cpu)
 		return true;
 #endif
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -138,20 +138,6 @@ SYM_CODE_START(startup_32)
 	jmp .Ldefault_entry
 SYM_CODE_END(startup_32)
 
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for
- * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot
- * unplug. Everything is set up already except the stack.
- */
-SYM_FUNC_START(soft_restart_cpu)
-	movl initial_stack, %ecx
-	movl %ecx, %esp
-	call *(initial_code)
-1:	jmp 1b
-SYM_FUNC_END(soft_restart_cpu)
-#endif
-
 /*
  * Non-boot CPU entry point; entered from trampoline.S
  * We can't lgdt here, because lgdt itself uses a data segment, but
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -375,7 +375,7 @@ SYM_CODE_END(secondary_startup_64)
 #include "verify_cpu.S"
 #include "sev_verify_cbit.S"
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT)
 /*
  * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for
  * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot
Re: [patch V4 07/37] x86/smpboot: Restrict soft_restart_cpu() to SEV
Posted by Philippe Mathieu-Daudé 10 months, 3 weeks ago
On 12/5/23 23:07, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> Now that the CPU0 hotplug cruft is gone, the only user is AMD SEV.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Michael Kelley <mikelley@microsoft.com>
> ---
>   arch/x86/kernel/callthunks.c |    2 +-
>   arch/x86/kernel/head_32.S    |   14 --------------
>   arch/x86/kernel/head_64.S    |    2 +-
>   3 files changed, 2 insertions(+), 16 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>


[patch V4 08/37] x86/smpboot: Remove unnecessary barrier()
Posted by Thomas Gleixner 11 months, 4 weeks ago
Peter stumbled over the barrier() after the invocation of smp_callin() in
start_secondary():

  "...this barrier() and it's comment seem weird vs smp_callin(). That
   function ends with an atomic bitop (it has to, at the very least it must
   not be weaker than store-release) but also has an explicit wmb() to order
   setup vs CPU_STARTING.

   There is no way the smp_processor_id() referred to in this comment can land
   before cpu_init() even without the barrier()."

The barrier() along with the comment was added in 2003 with commit
d8f19f2cac70 ("[PATCH] x86-64 merge") in the history tree. One of those
well documented combo patches of that time which changes world and some
more. The context back then was:

	/*
	 * Dont put anything before smp_callin(), SMP
	 * booting is too fragile that we want to limit the
	 * things done here to the most necessary things.
	 */
	cpu_init();
	smp_callin();

+	/* otherwise gcc will move up smp_processor_id before the cpu_init */
+ 	barrier();

	Dprintk("cpu %d: waiting for commence\n", smp_processor_id()); 

Even back in 2003 the compiler was not allowed to reorder that
smp_processor_id() invocation before the cpu_init() function call.
Especially not as smp_processor_id() resolved to:

  asm volatile("movl %%gs:%c1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory");

There is no trace of this change in any mailing list archive including the
back then official x86_64 list discuss@x86-64.org, which would explain the
problem this change solved.

The debug prints are gone by now and the the only smp_processor_id()
invocation today is farther down in start_secondary() after locking
vector_lock which itself prevents reordering.

Even if the compiler would be allowed to reorder this, the code would still
be correct as GSBASE is set up early in the assembly code and is valid when
the CPU reaches start_secondary(), while the code at the time when this
barrier was added did the GSBASE setup in cpu_init().

As the barrier has zero value, remove it.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230509100421.GU83892@hirez.programming.kicks-ass.net
---
V4: New patch
---
 arch/x86/kernel/smpboot.c |    2 --
 1 file changed, 2 deletions(-)

--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -238,8 +238,6 @@ static void notrace start_secondary(void
 	x86_cpuinit.early_percpu_clock_init();
 	smp_callin();
 
-	/* otherwise gcc will move up smp_processor_id before the cpu_init */
-	barrier();
 	/* Check TSC synchronization with the control CPU: */
 	check_tsc_sync_target();
[patch V4 09/37] x86/smpboot: Split up native_cpu_up() into separate phases and document them
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: David Woodhouse <dwmw@amazon.co.uk>

There are four logical parts to what native_cpu_up() does on the BSP (or
on the controlling CPU for a later hotplug):

 1) Wake the AP by sending the INIT/SIPI/SIPI sequence.

 2) Wait for the AP to make it as far as wait_for_master_cpu() which
    sets that CPU's bit in cpu_initialized_mask, then sets the bit in
    cpu_callout_mask to let the AP proceed through cpu_init().

 3) Wait for the AP to finish cpu_init() and get as far as the
    smp_callin() call, which sets that CPU's bit in cpu_callin_mask.

 4) Perform the TSC synchronization and wait for the AP to actually
    mark itself online in cpu_online_mask.

In preparation to allow these phases to operate in parallel on multiple
APs, split them out into separate functions and document the interactions
a little more clearly in both the BP and AP code paths.

No functional change intended.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/kernel/smpboot.c |  184 +++++++++++++++++++++++++++++-----------------
 1 file changed, 119 insertions(+), 65 deletions(-)

--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -193,6 +193,10 @@ static void smp_callin(void)
 
 	wmb();
 
+	/*
+	 * This runs the AP through all the cpuhp states to its target
+	 * state CPUHP_ONLINE.
+	 */
 	notify_cpu_starting(cpuid);
 
 	/*
@@ -233,12 +237,28 @@ static void notrace start_secondary(void
 	load_cr3(swapper_pg_dir);
 	__flush_tlb_all();
 #endif
+	/*
+	 * Sync point with wait_cpu_initialized(). Before proceeding through
+	 * cpu_init(), the AP will call wait_for_master_cpu() which sets its
+	 * own bit in cpu_initialized_mask and then waits for the BSP to set
+	 * its bit in cpu_callout_mask to release it.
+	 */
 	cpu_init_secondary();
 	rcu_cpu_starting(raw_smp_processor_id());
 	x86_cpuinit.early_percpu_clock_init();
+
+	/*
+	 * Sync point with wait_cpu_callin(). The AP doesn't wait here
+	 * but just sets the bit to let the controlling CPU (BSP) know that
+	 * it's got this far.
+	 */
 	smp_callin();
 
-	/* Check TSC synchronization with the control CPU: */
+	/*
+	 * Check TSC synchronization with the control CPU, which will do
+	 * its part of this from wait_cpu_online(), making it an implicit
+	 * synchronization point.
+	 */
 	check_tsc_sync_target();
 
 	/*
@@ -257,6 +277,7 @@ static void notrace start_secondary(void
 	 * half valid vector space.
 	 */
 	lock_vector_lock();
+	/* Sync point with do_wait_cpu_online() */
 	set_cpu_online(smp_processor_id(), true);
 	lapic_online();
 	unlock_vector_lock();
@@ -979,17 +1000,13 @@ int common_cpu_up(unsigned int cpu, stru
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from
+ * Returns zero if startup was successfully sent, else error code from
  * ->wakeup_secondary_cpu.
  */
 static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 {
-	/* start_ip had better be page-aligned! */
 	unsigned long start_ip = real_mode_header->trampoline_start;
 
-	unsigned long boot_error = 0;
-	unsigned long timeout;
-
 #ifdef CONFIG_X86_64
 	/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
 	if (apic->wakeup_secondary_cpu_64)
@@ -1046,60 +1063,89 @@ static int do_boot_cpu(int apicid, int c
 	 * - Use an INIT boot APIC message
 	 */
 	if (apic->wakeup_secondary_cpu_64)
-		boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
+		return apic->wakeup_secondary_cpu_64(apicid, start_ip);
 	else if (apic->wakeup_secondary_cpu)
-		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
-	else
-		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
+		return apic->wakeup_secondary_cpu(apicid, start_ip);
 
-	if (!boot_error) {
-		/*
-		 * Wait 10s total for first sign of life from AP
-		 */
-		boot_error = -1;
-		timeout = jiffies + 10*HZ;
-		while (time_before(jiffies, timeout)) {
-			if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
-				/*
-				 * Tell AP to proceed with initialization
-				 */
-				cpumask_set_cpu(cpu, cpu_callout_mask);
-				boot_error = 0;
-				break;
-			}
-			schedule();
-		}
-	}
+	return wakeup_secondary_cpu_via_init(apicid, start_ip);
+}
 
-	if (!boot_error) {
-		/*
-		 * Wait till AP completes initial initialization
-		 */
-		while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
-			/*
-			 * Allow other tasks to run while we wait for the
-			 * AP to come online. This also gives a chance
-			 * for the MTRR work(triggered by the AP coming online)
-			 * to be completed in the stop machine context.
-			 */
-			schedule();
-		}
-	}
+static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
+{
+	unsigned long timeout;
 
-	if (x86_platform.legacy.warm_reset) {
-		/*
-		 * Cleanup possible dangling ends...
-		 */
-		smpboot_restore_warm_reset_vector();
+	/*
+	 * Wait up to 10s for the CPU to report in.
+	 */
+	timeout = jiffies + 10*HZ;
+	while (time_before(jiffies, timeout)) {
+		if (cpumask_test_cpu(cpu, mask))
+			return 0;
+
+		schedule();
 	}
+	return -1;
+}
 
-	return boot_error;
+/*
+ * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
+ * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
+ * to proceed.  The AP will then proceed past setting its 'callin' bit
+ * and end up waiting in check_tsc_sync_target() until we reach
+ * do_wait_cpu_online() to tend to it.
+ */
+static int wait_cpu_initialized(unsigned int cpu)
+{
+	/*
+	 * Wait for first sign of life from AP.
+	 */
+	if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
+		return -1;
+
+	cpumask_set_cpu(cpu, cpu_callout_mask);
+	return 0;
 }
 
-int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+/*
+ * Bringup step three: Wait for the target AP to reach smp_callin().
+ * The AP is not waiting for us here so we don't need to parallelise
+ * this step. Not entirely clear why we care about this, since we just
+ * proceed directly to TSC synchronization which is the next sync
+ * point with the AP anyway.
+ */
+static void wait_cpu_callin(unsigned int cpu)
+{
+	while (!cpumask_test_cpu(cpu, cpu_callin_mask))
+		schedule();
+}
+
+/*
+ * Bringup step four: Synchronize the TSC and wait for the target AP
+ * to reach set_cpu_online() in start_secondary().
+ */
+static void wait_cpu_online(unsigned int cpu)
 {
-	int apicid = apic->cpu_present_to_apicid(cpu);
 	unsigned long flags;
+
+	/*
+	 * Check TSC synchronization with the AP (keep irqs disabled
+	 * while doing so):
+	 */
+	local_irq_save(flags);
+	check_tsc_sync_source(cpu);
+	local_irq_restore(flags);
+
+	/*
+	 * Wait for the AP to mark itself online, so the core caller
+	 * can drop sparse_irq_lock.
+	 */
+	while (!cpu_online(cpu))
+		schedule();
+}
+
+static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
+{
+	int apicid = apic->cpu_present_to_apicid(cpu);
 	int err;
 
 	lockdep_assert_irqs_enabled();
@@ -1140,25 +1186,33 @@ int native_cpu_up(unsigned int cpu, stru
 		return err;
 
 	err = do_boot_cpu(apicid, cpu, tidle);
-	if (err) {
+	if (err)
 		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
-		return err;
-	}
 
-	/*
-	 * Check TSC synchronization with the AP (keep irqs disabled
-	 * while doing so):
-	 */
-	local_irq_save(flags);
-	check_tsc_sync_source(cpu);
-	local_irq_restore(flags);
+	return err;
+}
 
-	while (!cpu_online(cpu)) {
-		cpu_relax();
-		touch_nmi_watchdog();
-	}
+int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	int ret;
 
-	return 0;
+	ret = native_kick_ap(cpu, tidle);
+	if (ret)
+		goto out;
+
+	ret = wait_cpu_initialized(cpu);
+	if (ret)
+		goto out;
+
+	wait_cpu_callin(cpu);
+	wait_cpu_online(cpu);
+
+out:
+	/* Cleanup possible dangling ends... */
+	if (x86_platform.legacy.warm_reset)
+		smpboot_restore_warm_reset_vector();
+
+	return ret;
 }
 
 /**
[patch V4 10/37] x86/smpboot: Get rid of cpu_init_secondary()
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

The synchronization of the AP with the control CPU is a SMP boot problem
and has nothing to do with cpu_init().

Open code cpu_init_secondary() in start_secondary() and move
wait_for_master_cpu() into the SMP boot code.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/include/asm/processor.h |    1 -
 arch/x86/kernel/cpu/common.c     |   27 ---------------------------
 arch/x86/kernel/smpboot.c        |   24 +++++++++++++++++++-----
 3 files changed, 19 insertions(+), 33 deletions(-)

--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -551,7 +551,6 @@ extern void switch_gdt_and_percpu_base(i
 extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void cpu_init(void);
-extern void cpu_init_secondary(void);
 extern void cpu_init_exception_handling(void);
 extern void cr4_init(void);
 
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2123,19 +2123,6 @@ static void dbg_restore_debug_regs(void)
 #define dbg_restore_debug_regs()
 #endif /* ! CONFIG_KGDB */
 
-static void wait_for_master_cpu(int cpu)
-{
-#ifdef CONFIG_SMP
-	/*
-	 * wait for ACK from master CPU before continuing
-	 * with AP initialization
-	 */
-	WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
-	while (!cpumask_test_cpu(cpu, cpu_callout_mask))
-		cpu_relax();
-#endif
-}
-
 static inline void setup_getcpu(int cpu)
 {
 	unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
@@ -2239,8 +2226,6 @@ void cpu_init(void)
 	struct task_struct *cur = current;
 	int cpu = raw_smp_processor_id();
 
-	wait_for_master_cpu(cpu);
-
 	ucode_cpu_init(cpu);
 
 #ifdef CONFIG_NUMA
@@ -2293,18 +2278,6 @@ void cpu_init(void)
 	load_fixmap_gdt(cpu);
 }
 
-#ifdef CONFIG_SMP
-void cpu_init_secondary(void)
-{
-	/*
-	 * Relies on the BP having set-up the IDT tables, which are loaded
-	 * on this CPU in cpu_init_exception_handling().
-	 */
-	cpu_init_exception_handling();
-	cpu_init();
-}
-#endif
-
 #ifdef CONFIG_MICROCODE_LATE_LOADING
 /**
  * store_cpu_caps() - Store a snapshot of CPU capabilities
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -220,6 +220,17 @@ static void ap_calibrate_delay(void)
 	cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
 }
 
+static void wait_for_master_cpu(int cpu)
+{
+	/*
+	 * Wait for release by control CPU before continuing with AP
+	 * initialization.
+	 */
+	WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
+	while (!cpumask_test_cpu(cpu, cpu_callout_mask))
+		cpu_relax();
+}
+
 /*
  * Activate a secondary processor.
  */
@@ -237,13 +248,16 @@ static void notrace start_secondary(void
 	load_cr3(swapper_pg_dir);
 	__flush_tlb_all();
 #endif
+	cpu_init_exception_handling();
+
 	/*
-	 * Sync point with wait_cpu_initialized(). Before proceeding through
-	 * cpu_init(), the AP will call wait_for_master_cpu() which sets its
-	 * own bit in cpu_initialized_mask and then waits for the BSP to set
-	 * its bit in cpu_callout_mask to release it.
+	 * Sync point with wait_cpu_initialized(). Sets AP in
+	 * cpu_initialized_mask and then waits for the control CPU
+	 * to release it.
 	 */
-	cpu_init_secondary();
+	wait_for_master_cpu(raw_smp_processor_id());
+
+	cpu_init();
 	rcu_cpu_starting(raw_smp_processor_id());
 	x86_cpuinit.early_percpu_clock_init();
Re: [patch V4 10/37] x86/smpboot: Get rid of cpu_init_secondary()
Posted by Philippe Mathieu-Daudé 10 months, 3 weeks ago
On 12/5/23 23:07, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> The synchronization of the AP with the control CPU is a SMP boot problem
> and has nothing to do with cpu_init().
> 
> Open code cpu_init_secondary() in start_secondary() and move
> wait_for_master_cpu() into the SMP boot code.
> 
> No functional change.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Michael Kelley <mikelley@microsoft.com>
> ---
>   arch/x86/include/asm/processor.h |    1 -
>   arch/x86/kernel/cpu/common.c     |   27 ---------------------------
>   arch/x86/kernel/smpboot.c        |   24 +++++++++++++++++++-----
>   3 files changed, 19 insertions(+), 33 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>


[patch V4 11/37] x86/cpu/cacheinfo: Remove cpu_callout_mask dependency
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

cpu_callout_mask is used for the stop machine based MTRR/PAT init.

In preparation of moving the BP/AP synchronization to the core hotplug
code, use a private CPU mask for cacheinfo and manage it in the
starting/dying hotplug state.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/kernel/cpu/cacheinfo.c |   21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -39,6 +39,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t
 /* Shared L2 cache maps */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
 
+static cpumask_var_t cpu_cacheinfo_mask;
+
 /* Kernel controls MTRR and/or PAT MSRs. */
 unsigned int memory_caching_control __ro_after_init;
 
@@ -1172,8 +1174,10 @@ void cache_bp_restore(void)
 		cache_cpu_init();
 }
 
-static int cache_ap_init(unsigned int cpu)
+static int cache_ap_online(unsigned int cpu)
 {
+	cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
+
 	if (!memory_caching_control || get_cache_aps_delayed_init())
 		return 0;
 
@@ -1191,11 +1195,17 @@ static int cache_ap_init(unsigned int cp
 	 *      lock to prevent MTRR entry changes
 	 */
 	stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
-				       cpu_callout_mask);
+				       cpu_cacheinfo_mask);
 
 	return 0;
 }
 
+static int cache_ap_offline(unsigned int cpu)
+{
+	cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
+	return 0;
+}
+
 /*
  * Delayed cache initialization for all AP's
  */
@@ -1210,9 +1220,12 @@ void cache_aps_init(void)
 
 static int __init cache_ap_register(void)
 {
+	zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
+	cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
+
 	cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
 				  "x86/cachectrl:starting",
-				  cache_ap_init, NULL);
+				  cache_ap_online, cache_ap_offline);
 	return 0;
 }
-core_initcall(cache_ap_register);
+early_initcall(cache_ap_register);
[patch V4 12/37] x86/smpboot: Move synchronization masks to SMP boot code
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

The usage is in smpboot.c and not in the CPU initialization code.

The XEN_PV usage of cpu_callout_mask is obsolete as cpu_init() not longer
waits and cacheinfo has its own CPU mask now, so cpu_callout_mask can be
made static too.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/include/asm/cpumask.h |    5 -----
 arch/x86/kernel/cpu/common.c   |   17 -----------------
 arch/x86/kernel/smpboot.c      |   16 ++++++++++++++++
 arch/x86/xen/smp_pv.c          |    3 ---
 4 files changed, 16 insertions(+), 25 deletions(-)
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -4,11 +4,6 @@
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
 
-extern cpumask_var_t cpu_callin_mask;
-extern cpumask_var_t cpu_callout_mask;
-extern cpumask_var_t cpu_initialized_mask;
-extern cpumask_var_t cpu_sibling_setup_mask;
-
 extern void setup_cpu_local_masks(void);
 
 /*
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -67,14 +67,6 @@
 
 u32 elf_hwcap2 __read_mostly;
 
-/* all of these masks are initialized in setup_cpu_local_masks() */
-cpumask_var_t cpu_initialized_mask;
-cpumask_var_t cpu_callout_mask;
-cpumask_var_t cpu_callin_mask;
-
-/* representing cpus for which sibling maps can be computed */
-cpumask_var_t cpu_sibling_setup_mask;
-
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
 EXPORT_SYMBOL(smp_num_siblings);
@@ -169,15 +161,6 @@ static void ppin_init(struct cpuinfo_x86
 	clear_cpu_cap(c, info->feature);
 }
 
-/* correctly size the local cpu masks */
-void __init setup_cpu_local_masks(void)
-{
-	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
-	alloc_bootmem_cpumask_var(&cpu_callin_mask);
-	alloc_bootmem_cpumask_var(&cpu_callout_mask);
-	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
-}
-
 static void default_init(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_X86_64
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,6 +101,13 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
+/* All of these masks are initialized in setup_cpu_local_masks() */
+static cpumask_var_t cpu_initialized_mask;
+static cpumask_var_t cpu_callout_mask;
+static cpumask_var_t cpu_callin_mask;
+/* Representing CPUs for which sibling maps can be computed */
+static cpumask_var_t cpu_sibling_setup_mask;
+
 /* Logical package management. We might want to allocate that dynamically */
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
@@ -1548,6 +1555,15 @@ early_param("possible_cpus", _setup_poss
 		set_cpu_possible(i, true);
 }
 
+/* correctly size the local cpu masks */
+void __init setup_cpu_local_masks(void)
+{
+	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+	alloc_bootmem_cpumask_var(&cpu_callin_mask);
+	alloc_bootmem_cpumask_var(&cpu_callout_mask);
+	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 /* Recompute SMT state for all CPUs on offline */
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -254,15 +254,12 @@ cpu_initialize_context(unsigned int cpu,
 	struct desc_struct *gdt;
 	unsigned long gdt_mfn;
 
-	/* used to tell cpu_init() that it can proceed with initialization */
-	cpumask_set_cpu(cpu, cpu_callout_mask);
 	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
 		return 0;
 
 	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (ctxt == NULL) {
 		cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
-		cpumask_clear_cpu(cpu, cpu_callout_mask);
 		return -ENOMEM;
 	}
[patch V4 13/37] x86/smpboot: Make TSC synchronization function call based
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Spin-waiting on the control CPU until the AP reaches the TSC
synchronization is just a waste especially in the case that there is no
synchronization required.

As the synchronization has to run with interrupts disabled the control CPU
part can just be done from a SMP function call. The upcoming AP issues that
call async only in the case that synchronization is required.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/include/asm/tsc.h |    2 --
 arch/x86/kernel/smpboot.c  |   20 +++-----------------
 arch/x86/kernel/tsc_sync.c |   36 +++++++++++-------------------------
 3 files changed, 14 insertions(+), 44 deletions(-)

--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -55,12 +55,10 @@ extern bool tsc_async_resets;
 #ifdef CONFIG_X86_TSC
 extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
 extern void tsc_verify_tsc_adjust(bool resume);
-extern void check_tsc_sync_source(int cpu);
 extern void check_tsc_sync_target(void);
 #else
 static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
 static inline void tsc_verify_tsc_adjust(bool resume) { }
-static inline void check_tsc_sync_source(int cpu) { }
 static inline void check_tsc_sync_target(void) { }
 #endif
 
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -275,11 +275,7 @@ static void notrace start_secondary(void
 	 */
 	smp_callin();
 
-	/*
-	 * Check TSC synchronization with the control CPU, which will do
-	 * its part of this from wait_cpu_online(), making it an implicit
-	 * synchronization point.
-	 */
+	/* Check TSC synchronization with the control CPU. */
 	check_tsc_sync_target();
 
 	/*
@@ -1141,21 +1137,11 @@ static void wait_cpu_callin(unsigned int
 }
 
 /*
- * Bringup step four: Synchronize the TSC and wait for the target AP
- * to reach set_cpu_online() in start_secondary().
+ * Bringup step four: Wait for the target AP to reach set_cpu_online() in
+ * start_secondary().
  */
 static void wait_cpu_online(unsigned int cpu)
 {
-	unsigned long flags;
-
-	/*
-	 * Check TSC synchronization with the AP (keep irqs disabled
-	 * while doing so):
-	 */
-	local_irq_save(flags);
-	check_tsc_sync_source(cpu);
-	local_irq_restore(flags);
-
 	/*
 	 * Wait for the AP to mark itself online, so the core caller
 	 * can drop sparse_irq_lock.
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -245,7 +245,6 @@ bool tsc_store_and_check_tsc_adjust(bool
  */
 static atomic_t start_count;
 static atomic_t stop_count;
-static atomic_t skip_test;
 static atomic_t test_runs;
 
 /*
@@ -344,21 +343,14 @@ static inline unsigned int loop_timeout(
 }
 
 /*
- * Source CPU calls into this - it waits for the freshly booted
- * target CPU to arrive and then starts the measurement:
+ * The freshly booted CPU initiates this via an async SMP function call.
  */
-void check_tsc_sync_source(int cpu)
+static void check_tsc_sync_source(void *__cpu)
 {
+	unsigned int cpu = (unsigned long)__cpu;
 	int cpus = 2;
 
 	/*
-	 * No need to check if we already know that the TSC is not
-	 * synchronized or if we have no TSC.
-	 */
-	if (unsynchronized_tsc())
-		return;
-
-	/*
 	 * Set the maximum number of test runs to
 	 *  1 if the CPU does not provide the TSC_ADJUST MSR
 	 *  3 if the MSR is available, so the target can try to adjust
@@ -368,16 +360,9 @@ void check_tsc_sync_source(int cpu)
 	else
 		atomic_set(&test_runs, 3);
 retry:
-	/*
-	 * Wait for the target to start or to skip the test:
-	 */
-	while (atomic_read(&start_count) != cpus - 1) {
-		if (atomic_read(&skip_test) > 0) {
-			atomic_set(&skip_test, 0);
-			return;
-		}
+	/* Wait for the target to start. */
+	while (atomic_read(&start_count) != cpus - 1)
 		cpu_relax();
-	}
 
 	/*
 	 * Trigger the target to continue into the measurement too:
@@ -397,14 +382,14 @@ void check_tsc_sync_source(int cpu)
 	if (!nr_warps) {
 		atomic_set(&test_runs, 0);
 
-		pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
+		pr_debug("TSC synchronization [CPU#%d -> CPU#%u]: passed\n",
 			smp_processor_id(), cpu);
 
 	} else if (atomic_dec_and_test(&test_runs) || random_warps) {
 		/* Force it to 0 if random warps brought us here */
 		atomic_set(&test_runs, 0);
 
-		pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n",
+		pr_warn("TSC synchronization [CPU#%d -> CPU#%u]:\n",
 			smp_processor_id(), cpu);
 		pr_warn("Measured %Ld cycles TSC warp between CPUs, "
 			"turning off TSC clock.\n", max_warp);
@@ -457,11 +442,12 @@ void check_tsc_sync_target(void)
 	 * SoCs the TSC is frequency synchronized, but still the TSC ADJUST
 	 * register might have been wreckaged by the BIOS..
 	 */
-	if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) {
-		atomic_inc(&skip_test);
+	if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable)
 		return;
-	}
 
+	/* Kick the control CPU into the TSC synchronization function */
+	smp_call_function_single(cpumask_first(cpu_online_mask), check_tsc_sync_source,
+				 (unsigned long *)(unsigned long)cpu, 0);
 retry:
 	/*
 	 * Register this CPU's participation and wait for the
[patch V4 14/37] x86/smpboot: Remove cpu_callin_mask
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Now that TSC synchronization is SMP function call based there is no reason
to wait for the AP to be set in smp_callin_mask. The control CPU waits for
the AP to set itself in the online mask anyway.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
V4: Rename smp_callin() to ap_starting() - Peter Z.
---
 arch/x86/kernel/smpboot.c |   74 +++++++++-------------------------------------
 1 file changed, 15 insertions(+), 59 deletions(-)

--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -104,7 +104,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 /* All of these masks are initialized in setup_cpu_local_masks() */
 static cpumask_var_t cpu_initialized_mask;
 static cpumask_var_t cpu_callout_mask;
-static cpumask_var_t cpu_callin_mask;
 /* Representing CPUs for which sibling maps can be computed */
 static cpumask_var_t cpu_sibling_setup_mask;
 
@@ -161,38 +160,30 @@ static inline void smpboot_restore_warm_
 
 }
 
-/*
- * Report back to the Boot Processor during boot time or to the caller processor
- * during CPU online.
- */
-static void smp_callin(void)
+/* Run the next set of setup steps for the upcoming CPU */
+static void ap_starting(void)
 {
-	int cpuid;
-
-	/*
-	 * If waken up by an INIT in an 82489DX configuration
-	 * cpu_callout_mask guarantees we don't get here before
-	 * an INIT_deassert IPI reaches our local APIC, so it is
-	 * now safe to touch our local APIC.
-	 */
-	cpuid = smp_processor_id();
+	int cpuid = smp_processor_id();
 
 	/*
-	 * the boot CPU has finished the init stage and is spinning
-	 * on callin_map until we finish. We are free to set up this
-	 * CPU, first the APIC. (this is probably redundant on most
-	 * boards)
+	 * If woken up by an INIT in an 82489DX configuration
+	 * cpu_callout_mask guarantees the CPU does not reach this point
+	 * before an INIT_deassert IPI reaches the local APIC, so it is now
+	 * safe to touch the local APIC.
+	 *
+	 * Set up this CPU, first the APIC, which is probably redundant on
+	 * most boards.
 	 */
 	apic_ap_setup();
 
-	/* Save our processor parameters. */
+	/* Save the processor parameters. */
 	smp_store_cpu_info(cpuid);
 
 	/*
 	 * The topology information must be up to date before
 	 * notify_cpu_starting().
 	 */
-	set_cpu_sibling_map(raw_smp_processor_id());
+	set_cpu_sibling_map(cpuid);
 
 	ap_init_aperfmperf();
 
@@ -205,11 +196,6 @@ static void smp_callin(void)
 	 * state CPUHP_ONLINE.
 	 */
 	notify_cpu_starting(cpuid);
-
-	/*
-	 * Allow the master to continue.
-	 */
-	cpumask_set_cpu(cpuid, cpu_callin_mask);
 }
 
 static void ap_calibrate_delay(void)
@@ -268,12 +254,7 @@ static void notrace start_secondary(void
 	rcu_cpu_starting(raw_smp_processor_id());
 	x86_cpuinit.early_percpu_clock_init();
 
-	/*
-	 * Sync point with wait_cpu_callin(). The AP doesn't wait here
-	 * but just sets the bit to let the controlling CPU (BSP) know that
-	 * it's got this far.
-	 */
-	smp_callin();
+	ap_starting();
 
 	/* Check TSC synchronization with the control CPU. */
 	check_tsc_sync_target();
@@ -1109,7 +1090,7 @@ static int wait_cpu_cpumask(unsigned int
  * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
  * to proceed.  The AP will then proceed past setting its 'callin' bit
  * and end up waiting in check_tsc_sync_target() until we reach
- * do_wait_cpu_online() to tend to it.
+ * wait_cpu_online() to tend to it.
  */
 static int wait_cpu_initialized(unsigned int cpu)
 {
@@ -1124,20 +1105,7 @@ static int wait_cpu_initialized(unsigned
 }
 
 /*
- * Bringup step three: Wait for the target AP to reach smp_callin().
- * The AP is not waiting for us here so we don't need to parallelise
- * this step. Not entirely clear why we care about this, since we just
- * proceed directly to TSC synchronization which is the next sync
- * point with the AP anyway.
- */
-static void wait_cpu_callin(unsigned int cpu)
-{
-	while (!cpumask_test_cpu(cpu, cpu_callin_mask))
-		schedule();
-}
-
-/*
- * Bringup step four: Wait for the target AP to reach set_cpu_online() in
+ * Bringup step three: Wait for the target AP to reach set_cpu_online() in
  * start_secondary().
  */
 static void wait_cpu_online(unsigned int cpu)
@@ -1167,14 +1135,6 @@ static int native_kick_ap(unsigned int c
 	}
 
 	/*
-	 * Already booted CPU?
-	 */
-	if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
-		pr_debug("do_boot_cpu %d Already started\n", cpu);
-		return -ENOSYS;
-	}
-
-	/*
 	 * Save current MTRR state in case it was changed since early boot
 	 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
 	 */
@@ -1211,7 +1171,6 @@ int native_cpu_up(unsigned int cpu, stru
 	if (ret)
 		goto out;
 
-	wait_cpu_callin(cpu);
 	wait_cpu_online(cpu);
 
 out:
@@ -1327,7 +1286,6 @@ void __init smp_prepare_cpus_common(void
 	 * Setup boot CPU information
 	 */
 	smp_store_boot_cpu_info(); /* Final full version of the data */
-	cpumask_copy(cpu_callin_mask, cpumask_of(0));
 	mb();
 
 	for_each_possible_cpu(i) {
@@ -1542,7 +1500,6 @@ early_param("possible_cpus", _setup_poss
 void __init setup_cpu_local_masks(void)
 {
 	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
-	alloc_bootmem_cpumask_var(&cpu_callin_mask);
 	alloc_bootmem_cpumask_var(&cpu_callout_mask);
 	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
 }
@@ -1606,7 +1563,6 @@ static void remove_cpu_from_maps(int cpu
 {
 	set_cpu_online(cpu, false);
 	cpumask_clear_cpu(cpu, cpu_callout_mask);
-	cpumask_clear_cpu(cpu, cpu_callin_mask);
 	/* was set by cpu_init() */
 	cpumask_clear_cpu(cpu, cpu_initialized_mask);
 	numa_remove_cpu(cpu);
[patch V4 15/37] cpu/hotplug: Rework sparse_irq locking in bringup_cpu()
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

There is no harm to hold sparse_irq lock until the upcoming CPU completes
in cpuhp_online_idle(). This allows to remove cpu_online() synchronization
from architecture code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
V4: Amend comment about sparse irq lock - Peter Z.
---
 kernel/cpu.c |   34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -558,7 +558,7 @@ static int cpuhp_kick_ap(int cpu, struct
 	return ret;
 }
 
-static int bringup_wait_for_ap(unsigned int cpu)
+static int bringup_wait_for_ap_online(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
@@ -579,15 +579,12 @@ static int bringup_wait_for_ap(unsigned
 	 */
 	if (!cpu_smt_allowed(cpu))
 		return -ECANCELED;
-
-	if (st->target <= CPUHP_AP_ONLINE_IDLE)
-		return 0;
-
-	return cpuhp_kick_ap(cpu, st, st->target);
+	return 0;
 }
 
 static int bringup_cpu(unsigned int cpu)
 {
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	struct task_struct *idle = idle_thread_get(cpu);
 	int ret;
 
@@ -600,16 +597,33 @@ static int bringup_cpu(unsigned int cpu)
 	/*
 	 * Some architectures have to walk the irq descriptors to
 	 * setup the vector space for the cpu which comes online.
-	 * Prevent irq alloc/free across the bringup.
+	 *
+	 * Prevent irq alloc/free across the bringup by acquiring the
+	 * sparse irq lock. Hold it until the upcoming CPU completes the
+	 * startup in cpuhp_online_idle() which allows to avoid
+	 * intermediate synchronization points in the architecture code.
 	 */
 	irq_lock_sparse();
 
 	/* Arch-specific enabling code. */
 	ret = __cpu_up(cpu, idle);
-	irq_unlock_sparse();
 	if (ret)
-		return ret;
-	return bringup_wait_for_ap(cpu);
+		goto out_unlock;
+
+	ret = bringup_wait_for_ap_online(cpu);
+	if (ret)
+		goto out_unlock;
+
+	irq_unlock_sparse();
+
+	if (st->target <= CPUHP_AP_ONLINE_IDLE)
+		return 0;
+
+	return cpuhp_kick_ap(cpu, st, st->target);
+
+out_unlock:
+	irq_unlock_sparse();
+	return ret;
 }
 
 static int finish_cpu(unsigned int cpu)
[patch V4 16/37] x86/smpboot: Remove wait for cpu_online()
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Now that the core code drops sparse_irq_lock after the idle thread
synchronized, it's pointless to wait for the AP to mark itself online.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/kernel/smpboot.c |   26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -275,7 +275,6 @@ static void notrace start_secondary(void
 	 * half valid vector space.
 	 */
 	lock_vector_lock();
-	/* Sync point with do_wait_cpu_online() */
 	set_cpu_online(smp_processor_id(), true);
 	lapic_online();
 	unlock_vector_lock();
@@ -1104,20 +1103,6 @@ static int wait_cpu_initialized(unsigned
 	return 0;
 }
 
-/*
- * Bringup step three: Wait for the target AP to reach set_cpu_online() in
- * start_secondary().
- */
-static void wait_cpu_online(unsigned int cpu)
-{
-	/*
-	 * Wait for the AP to mark itself online, so the core caller
-	 * can drop sparse_irq_lock.
-	 */
-	while (!cpu_online(cpu))
-		schedule();
-}
-
 static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
 {
 	int apicid = apic->cpu_present_to_apicid(cpu);
@@ -1164,16 +1149,9 @@ int native_cpu_up(unsigned int cpu, stru
 	int ret;
 
 	ret = native_kick_ap(cpu, tidle);
-	if (ret)
-		goto out;
-
-	ret = wait_cpu_initialized(cpu);
-	if (ret)
-		goto out;
-
-	wait_cpu_online(cpu);
+	if (!ret)
+		ret = wait_cpu_initialized(cpu);
 
-out:
 	/* Cleanup possible dangling ends... */
 	if (x86_platform.legacy.warm_reset)
 		smpboot_restore_warm_reset_vector();
[patch V4 17/37] x86/xen/smp_pv: Remove wait for CPU online
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Now that the core code drops sparse_irq_lock after the idle thread
synchronized, it's pointless to wait for the AP to mark itself online.

Whether the control CPU runs in a wait loop or sleeps in the core code
waiting for the online operation to complete makes no difference.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/xen/smp_pv.c |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)


--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -340,11 +340,11 @@ static int xen_pv_cpu_up(unsigned int cp
 
 	xen_pmu_init(cpu);
 
-	rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
-	BUG_ON(rc);
-
-	while (cpu_report_state(cpu) != CPU_ONLINE)
-		HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+	/*
+	 * Why is this a BUG? If the hypercall fails then everything can be
+	 * rolled back, no?
+	 */
+	BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL));
 
 	return 0;
 }
[patch V4 18/37] x86/xen/hvm: Get rid of DEAD_FROZEN handling
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

No point in this conditional voodoo. Un-initializing the lock mechanism is
safe to be called unconditionally even if it was already invoked when the
CPU died.

Remove the invocation of xen_smp_intr_free() as that has been already
cleaned up in xen_cpu_dead_hvm().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/xen/enlighten_hvm.c |   11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -161,13 +161,12 @@ static int xen_cpu_up_prepare_hvm(unsign
 	int rc = 0;
 
 	/*
-	 * This can happen if CPU was offlined earlier and
-	 * offlining timed out in common_cpu_die().
+	 * If a CPU was offlined earlier and offlining timed out then the
+	 * lock mechanism is still initialized. Uninit it unconditionally
+	 * as it's safe to call even if already uninited. Interrupts and
+	 * timer have already been handled in xen_cpu_dead_hvm().
 	 */
-	if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
-		xen_smp_intr_free(cpu);
-		xen_uninit_lock_cpu(cpu);
-	}
+	xen_uninit_lock_cpu(cpu);
 
 	if (cpu_acpi_id(cpu) != U32_MAX)
 		per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
[patch V4 19/37] cpu/hotplug: Add CPU state tracking and synchronization
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

The CPU state tracking and synchronization mechanism in smpboot.c is
completely independent of the hotplug code and all logic around it is
implemented in architecture specific code.

Except for the state reporting of the AP there is absolutely nothing
architecture specific and the sychronization and decision functions can be
moved into the generic hotplug core code.

Provide an integrated variant and add the core synchronization and decision
points. This comes in two flavours:

  1) DEAD state synchronization

     Updated by the architecture code once the AP reaches the point where
     it is ready to be torn down by the control CPU, e.g. by removing power
     or clocks or tear down via the hypervisor.

     The control CPU waits for this state to be reached with a timeout. If
     the state is reached an architecture specific cleanup function is
     invoked.

  2) Full state synchronization

     This extends #1 with AP alive synchronization. This is new
     functionality, which allows to replace architecture specific wait
     mechanims, e.g. cpumasks, completely.

     It also prevents that an AP which is in a limbo state can be brought
     up again. This can happen when an AP failed to report dead state
     during a previous off-line operation.

The dead synchronization is what most architectures use. Only x86 makes a
bringup decision based on that state at the moment.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
V4: Remove the try_cmpxchg() loop in cpuhp_ap_update_sync_state() - Peter Z.
---
 arch/Kconfig               |   15 +++
 include/linux/cpuhotplug.h |   12 ++
 kernel/cpu.c               |  193 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/smpboot.c           |    2 
 4 files changed, 221 insertions(+), 1 deletion(-)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -34,6 +34,21 @@ config ARCH_HAS_SUBPAGE_FAULTS
 config HOTPLUG_SMT
 	bool
 
+# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL
+config HOTPLUG_CORE_SYNC
+	bool
+
+# Basic CPU dead synchronization selected by architecture
+config HOTPLUG_CORE_SYNC_DEAD
+	bool
+	select HOTPLUG_CORE_SYNC
+
+# Full CPU synchronization with alive state selected by architecture
+config HOTPLUG_CORE_SYNC_FULL
+	bool
+	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
+	select HOTPLUG_CORE_SYNC
+
 config GENERIC_ENTRY
 	bool
 
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -517,4 +517,16 @@ void cpuhp_online_idle(enum cpuhp_state
 static inline void cpuhp_online_idle(enum cpuhp_state state) { }
 #endif
 
+void cpuhp_ap_sync_alive(void);
+void arch_cpuhp_sync_state_poll(void);
+void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
+void cpuhp_ap_report_dead(void);
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu);
+#else
+static inline void cpuhp_ap_report_dead(void) { }
+static inline void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
+#endif
+
 #endif
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -17,6 +17,7 @@
 #include <linux/cpu.h>
 #include <linux/oom.h>
 #include <linux/rcupdate.h>
+#include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/bug.h>
 #include <linux/kthread.h>
@@ -59,6 +60,7 @@
  * @last:	For multi-instance rollback, remember how far we got
  * @cb_state:	The state for a single callback (install/uninstall)
  * @result:	Result of the operation
+ * @ap_sync_state:	State for AP synchronization
  * @done_up:	Signal completion to the issuer of the task for cpu-up
  * @done_down:	Signal completion to the issuer of the task for cpu-down
  */
@@ -76,6 +78,7 @@ struct cpuhp_cpu_state {
 	struct hlist_node	*last;
 	enum cpuhp_state	cb_state;
 	int			result;
+	atomic_t		ap_sync_state;
 	struct completion	done_up;
 	struct completion	done_down;
 #endif
@@ -276,6 +279,182 @@ static bool cpuhp_is_atomic_state(enum c
 	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
 }
 
+/* Synchronization state management */
+enum cpuhp_sync_state {
+	SYNC_STATE_DEAD,
+	SYNC_STATE_KICKED,
+	SYNC_STATE_SHOULD_DIE,
+	SYNC_STATE_ALIVE,
+	SYNC_STATE_SHOULD_ONLINE,
+	SYNC_STATE_ONLINE,
+};
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC
+/**
+ * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
+ * @state:	The synchronization state to set
+ *
+ * No synchronization point. Just update of the synchronization state, but implies
+ * a full barrier so that the AP changes are visible before the control CPU proceeds.
+ */
+static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
+{
+	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+	(void)atomic_xchg(st, state);
+}
+
+void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
+
+static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
+				      enum cpuhp_sync_state next_state)
+{
+	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+	ktime_t now, end, start = ktime_get();
+	int sync;
+
+	end = start + 10ULL * NSEC_PER_SEC;
+
+	sync = atomic_read(st);
+	while (1) {
+		if (sync == state) {
+			if (!atomic_try_cmpxchg(st, &sync, next_state))
+				continue;
+			return true;
+		}
+
+		now = ktime_get();
+		if (now > end) {
+			/* Timeout. Leave the state unchanged */
+			return false;
+		} else if (now - start < NSEC_PER_MSEC) {
+			/* Poll for one millisecond */
+			arch_cpuhp_sync_state_poll();
+		} else {
+			usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
+		}
+		sync = atomic_read(st);
+	}
+	return true;
+}
+#else  /* CONFIG_HOTPLUG_CORE_SYNC */
+static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
+/**
+ * cpuhp_ap_report_dead - Update synchronization state to DEAD
+ *
+ * No synchronization point. Just update of the synchronization state.
+ */
+void cpuhp_ap_report_dead(void)
+{
+	cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
+}
+
+void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
+
+/*
+ * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
+ * because the AP cannot issue complete() at this stage.
+ */
+static void cpuhp_bp_sync_dead(unsigned int cpu)
+{
+	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+	int sync = atomic_read(st);
+
+	do {
+		/* CPU can have reported dead already. Don't overwrite that! */
+		if (sync == SYNC_STATE_DEAD)
+			break;
+	} while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
+
+	if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
+		/* CPU reached dead state. Invoke the cleanup function */
+		arch_cpuhp_cleanup_dead_cpu(cpu);
+		return;
+	}
+
+	/* No further action possible. Emit message and give up. */
+	pr_err("CPU%u failed to report dead state\n", cpu);
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
+static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
+/**
+ * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
+ *
+ * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
+ * for the BP to release it.
+ */
+void cpuhp_ap_sync_alive(void)
+{
+	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+	cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
+
+	/* Wait for the control CPU to release it. */
+	while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
+		cpu_relax();
+}
+
+static bool cpuhp_can_boot_ap(unsigned int cpu)
+{
+	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+	int sync = atomic_read(st);
+
+again:
+	switch (sync) {
+	case SYNC_STATE_DEAD:
+		/* CPU is properly dead */
+		break;
+	case SYNC_STATE_KICKED:
+		/* CPU did not come up in previous attempt */
+		break;
+	case SYNC_STATE_ALIVE:
+		/* CPU is stuck cpuhp_ap_sync_alive(). */
+		break;
+	default:
+		/* CPU failed to report online or dead and is in limbo state. */
+		return false;
+	}
+
+	/* Prepare for booting */
+	if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
+		goto again;
+
+	return true;
+}
+
+void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
+
+/*
+ * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
+ * because the AP cannot issue complete() so early in the bringup.
+ */
+static int cpuhp_bp_sync_alive(unsigned int cpu)
+{
+	int ret = 0;
+
+	if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
+		return 0;
+
+	if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
+		pr_err("CPU%u failed to report alive state\n", cpu);
+		ret = -EIO;
+	}
+
+	/* Let the architecture cleanup the kick alive mechanics. */
+	arch_cpuhp_cleanup_kick_cpu(cpu);
+	return ret;
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
+static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
+static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
+
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 bool cpuhp_tasks_frozen;
@@ -588,6 +767,9 @@ static int bringup_cpu(unsigned int cpu)
 	struct task_struct *idle = idle_thread_get(cpu);
 	int ret;
 
+	if (!cpuhp_can_boot_ap(cpu))
+		return -EAGAIN;
+
 	/*
 	 * Reset stale stack state from the last time this CPU was online.
 	 */
@@ -610,6 +792,10 @@ static int bringup_cpu(unsigned int cpu)
 	if (ret)
 		goto out_unlock;
 
+	ret = cpuhp_bp_sync_alive(cpu);
+	if (ret)
+		goto out_unlock;
+
 	ret = bringup_wait_for_ap_online(cpu);
 	if (ret)
 		goto out_unlock;
@@ -1113,6 +1299,8 @@ static int takedown_cpu(unsigned int cpu
 	/* This actually kills the CPU. */
 	__cpu_die(cpu);
 
+	cpuhp_bp_sync_dead(cpu);
+
 	tick_cleanup_dead_cpu(cpu);
 	rcutree_migrate_callbacks(cpu);
 	return 0;
@@ -1359,8 +1547,10 @@ void cpuhp_online_idle(enum cpuhp_state
 	if (state != CPUHP_AP_ONLINE_IDLE)
 		return;
 
+	cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);
+
 	/*
-	 * Unpart the stopper thread before we start the idle loop (and start
+	 * Unpark the stopper thread before we start the idle loop (and start
 	 * scheduling); this ensures the stopper task is always available.
 	 */
 	stop_machine_unpark(smp_processor_id());
@@ -2737,6 +2927,7 @@ void __init boot_cpu_hotplug_init(void)
 {
 #ifdef CONFIG_SMP
 	cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
+	atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
 #endif
 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 	this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -326,6 +326,7 @@ void smpboot_unregister_percpu_thread(st
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
 
+#ifndef CONFIG_HOTPLUG_CORE_SYNC
 static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
 
 /*
@@ -488,3 +489,4 @@ bool cpu_report_death(void)
 }
 
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
[patch V4 20/37] x86/smpboot: Switch to hotplug core state synchronization
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

The new AP state tracking and synchronization mechanism in the CPU hotplug
core code allows to remove quite some x86 specific code:

  1) The AP alive synchronization based on cpumasks

  2) The decision whether an AP can be brought up again

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
V2: Use for_each_online_cpu() - Brian
---
 arch/x86/Kconfig           |    1 
 arch/x86/include/asm/smp.h |    7 +
 arch/x86/kernel/smp.c      |    1 
 arch/x86/kernel/smpboot.c  |  165 +++++++++++----------------------------------
 arch/x86/xen/smp_hvm.c     |   16 +---
 arch/x86/xen/smp_pv.c      |   39 ++++++----
 6 files changed, 75 insertions(+), 154 deletions(-)


--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -274,6 +274,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_USER_RETURN_NOTIFIER
 	select HAVE_GENERIC_VDSO
+	select HOTPLUG_CORE_SYNC_FULL		if SMP
 	select HOTPLUG_SMT			if SMP
 	select IRQ_FORCED_THREADING
 	select NEED_PER_CPU_EMBED_FIRST_CHUNK
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -38,6 +38,8 @@ struct smp_ops {
 	void (*crash_stop_other_cpus)(void);
 	void (*smp_send_reschedule)(int cpu);
 
+	void (*cleanup_dead_cpu)(unsigned cpu);
+	void (*poll_sync_state)(void);
 	int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
 	int (*cpu_disable)(void);
 	void (*cpu_die)(unsigned int cpu);
@@ -90,7 +92,8 @@ static inline int __cpu_disable(void)
 
 static inline void __cpu_die(unsigned int cpu)
 {
-	smp_ops.cpu_die(cpu);
+	if (smp_ops.cpu_die)
+		smp_ops.cpu_die(cpu);
 }
 
 static inline void __noreturn play_dead(void)
@@ -123,8 +126,6 @@ void native_smp_cpus_done(unsigned int m
 int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
-int common_cpu_die(unsigned int cpu);
-void native_cpu_die(unsigned int cpu);
 void __noreturn hlt_play_dead(void);
 void native_play_dead(void);
 void play_dead_common(void);
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -269,7 +269,6 @@ struct smp_ops smp_ops = {
 	.smp_send_reschedule	= native_smp_send_reschedule,
 
 	.cpu_up			= native_cpu_up,
-	.cpu_die		= native_cpu_die,
 	.cpu_disable		= native_cpu_disable,
 	.play_dead		= native_play_dead,
 
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -57,6 +57,7 @@
 #include <linux/pgtable.h>
 #include <linux/overflow.h>
 #include <linux/stackprotector.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/acpi.h>
 #include <asm/cacheinfo.h>
@@ -101,9 +102,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
-/* All of these masks are initialized in setup_cpu_local_masks() */
-static cpumask_var_t cpu_initialized_mask;
-static cpumask_var_t cpu_callout_mask;
 /* Representing CPUs for which sibling maps can be computed */
 static cpumask_var_t cpu_sibling_setup_mask;
 
@@ -166,10 +164,10 @@ static void ap_starting(void)
 	int cpuid = smp_processor_id();
 
 	/*
-	 * If woken up by an INIT in an 82489DX configuration
-	 * cpu_callout_mask guarantees the CPU does not reach this point
-	 * before an INIT_deassert IPI reaches the local APIC, so it is now
-	 * safe to touch the local APIC.
+	 * If woken up by an INIT in an 82489DX configuration the alive
+	 * synchronization guarantees that the CPU does not reach this
+	 * point before an INIT_deassert IPI reaches the local APIC, so it
+	 * is now safe to touch the local APIC.
 	 *
 	 * Set up this CPU, first the APIC, which is probably redundant on
 	 * most boards.
@@ -213,17 +211,6 @@ static void ap_calibrate_delay(void)
 	cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
 }
 
-static void wait_for_master_cpu(int cpu)
-{
-	/*
-	 * Wait for release by control CPU before continuing with AP
-	 * initialization.
-	 */
-	WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
-	while (!cpumask_test_cpu(cpu, cpu_callout_mask))
-		cpu_relax();
-}
-
 /*
  * Activate a secondary processor.
  */
@@ -244,11 +231,11 @@ static void notrace start_secondary(void
 	cpu_init_exception_handling();
 
 	/*
-	 * Sync point with wait_cpu_initialized(). Sets AP in
-	 * cpu_initialized_mask and then waits for the control CPU
-	 * to release it.
+	 * Synchronization point with the hotplug core. Sets the
+	 * synchronization state to ALIVE and waits for the control CPU to
+	 * release this CPU for further bringup.
 	 */
-	wait_for_master_cpu(raw_smp_processor_id());
+	cpuhp_ap_sync_alive();
 
 	cpu_init();
 	rcu_cpu_starting(raw_smp_processor_id());
@@ -278,7 +265,6 @@ static void notrace start_secondary(void
 	set_cpu_online(smp_processor_id(), true);
 	lapic_online();
 	unlock_vector_lock();
-	cpu_set_state_online(smp_processor_id());
 	x86_platform.nmi_init();
 
 	/* enable local interrupts */
@@ -729,9 +715,9 @@ static void impress_friends(void)
 	 * Allow the user to impress friends.
 	 */
 	pr_debug("Before bogomips\n");
-	for_each_possible_cpu(cpu)
-		if (cpumask_test_cpu(cpu, cpu_callout_mask))
-			bogosum += cpu_data(cpu).loops_per_jiffy;
+	for_each_online_cpu(cpu)
+		bogosum += cpu_data(cpu).loops_per_jiffy;
+
 	pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
 		num_online_cpus(),
 		bogosum/(500000/HZ),
@@ -1003,6 +989,7 @@ int common_cpu_up(unsigned int cpu, stru
 static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 {
 	unsigned long start_ip = real_mode_header->trampoline_start;
+	int ret;
 
 #ifdef CONFIG_X86_64
 	/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
@@ -1043,13 +1030,6 @@ static int do_boot_cpu(int apicid, int c
 		}
 	}
 
-	/*
-	 * AP might wait on cpu_callout_mask in cpu_init() with
-	 * cpu_initialized_mask set if previous attempt to online
-	 * it timed-out. Clear cpu_initialized_mask so that after
-	 * INIT/SIPI it could start with a clean state.
-	 */
-	cpumask_clear_cpu(cpu, cpu_initialized_mask);
 	smp_mb();
 
 	/*
@@ -1060,47 +1040,16 @@ static int do_boot_cpu(int apicid, int c
 	 * - Use an INIT boot APIC message
 	 */
 	if (apic->wakeup_secondary_cpu_64)
-		return apic->wakeup_secondary_cpu_64(apicid, start_ip);
+		ret = apic->wakeup_secondary_cpu_64(apicid, start_ip);
 	else if (apic->wakeup_secondary_cpu)
-		return apic->wakeup_secondary_cpu(apicid, start_ip);
-
-	return wakeup_secondary_cpu_via_init(apicid, start_ip);
-}
-
-static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
-{
-	unsigned long timeout;
-
-	/*
-	 * Wait up to 10s for the CPU to report in.
-	 */
-	timeout = jiffies + 10*HZ;
-	while (time_before(jiffies, timeout)) {
-		if (cpumask_test_cpu(cpu, mask))
-			return 0;
-
-		schedule();
-	}
-	return -1;
-}
-
-/*
- * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
- * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
- * to proceed.  The AP will then proceed past setting its 'callin' bit
- * and end up waiting in check_tsc_sync_target() until we reach
- * wait_cpu_online() to tend to it.
- */
-static int wait_cpu_initialized(unsigned int cpu)
-{
-	/*
-	 * Wait for first sign of life from AP.
-	 */
-	if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
-		return -1;
+		ret = apic->wakeup_secondary_cpu(apicid, start_ip);
+	else
+		ret = wakeup_secondary_cpu_via_init(apicid, start_ip);
 
-	cpumask_set_cpu(cpu, cpu_callout_mask);
-	return 0;
+	/* If the wakeup mechanism failed, cleanup the warm reset vector */
+	if (ret)
+		arch_cpuhp_cleanup_kick_cpu(cpu);
+	return ret;
 }
 
 static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
@@ -1125,11 +1074,6 @@ static int native_kick_ap(unsigned int c
 	 */
 	mtrr_save_state();
 
-	/* x86 CPUs take themselves offline, so delayed offline is OK. */
-	err = cpu_check_up_prepare(cpu);
-	if (err && err != -EBUSY)
-		return err;
-
 	/* the FPU context is blank, nobody can own it */
 	per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
 
@@ -1146,17 +1090,29 @@ static int native_kick_ap(unsigned int c
 
 int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
-	int ret;
-
-	ret = native_kick_ap(cpu, tidle);
-	if (!ret)
-		ret = wait_cpu_initialized(cpu);
+	return native_kick_ap(cpu, tidle);
+}
 
+void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu)
+{
 	/* Cleanup possible dangling ends... */
-	if (x86_platform.legacy.warm_reset)
+	if (smp_ops.cpu_up == native_cpu_up && x86_platform.legacy.warm_reset)
 		smpboot_restore_warm_reset_vector();
+}
 
-	return ret;
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
+	if (smp_ops.cleanup_dead_cpu)
+		smp_ops.cleanup_dead_cpu(cpu);
+
+	if (system_state == SYSTEM_RUNNING)
+		pr_info("CPU %u is now offline\n", cpu);
+}
+
+void arch_cpuhp_sync_state_poll(void)
+{
+	if (smp_ops.poll_sync_state)
+		smp_ops.poll_sync_state();
 }
 
 /**
@@ -1348,9 +1304,6 @@ void __init native_smp_prepare_boot_cpu(
 	if (!IS_ENABLED(CONFIG_SMP))
 		switch_gdt_and_percpu_base(me);
 
-	/* already set me in cpu_online_mask in boot_cpu_init() */
-	cpumask_set_cpu(me, cpu_callout_mask);
-	cpu_set_state_online(me);
 	native_pv_lock_init();
 }
 
@@ -1477,8 +1430,6 @@ early_param("possible_cpus", _setup_poss
 /* correctly size the local cpu masks */
 void __init setup_cpu_local_masks(void)
 {
-	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
-	alloc_bootmem_cpumask_var(&cpu_callout_mask);
 	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
 }
 
@@ -1540,9 +1491,6 @@ static void remove_siblinginfo(int cpu)
 static void remove_cpu_from_maps(int cpu)
 {
 	set_cpu_online(cpu, false);
-	cpumask_clear_cpu(cpu, cpu_callout_mask);
-	/* was set by cpu_init() */
-	cpumask_clear_cpu(cpu, cpu_initialized_mask);
 	numa_remove_cpu(cpu);
 }
 
@@ -1593,36 +1541,11 @@ int native_cpu_disable(void)
 	return 0;
 }
 
-int common_cpu_die(unsigned int cpu)
-{
-	int ret = 0;
-
-	/* We don't do anything here: idle task is faking death itself. */
-
-	/* They ack this in play_dead() by setting CPU_DEAD */
-	if (cpu_wait_death(cpu, 5)) {
-		if (system_state == SYSTEM_RUNNING)
-			pr_info("CPU %u is now offline\n", cpu);
-	} else {
-		pr_err("CPU %u didn't die...\n", cpu);
-		ret = -1;
-	}
-
-	return ret;
-}
-
-void native_cpu_die(unsigned int cpu)
-{
-	common_cpu_die(cpu);
-}
-
 void play_dead_common(void)
 {
 	idle_task_exit();
 
-	/* Ack it */
-	(void)cpu_report_death();
-
+	cpuhp_ap_report_dead();
 	/*
 	 * With physical CPU hotplug, we should halt the cpu
 	 */
@@ -1724,12 +1647,6 @@ int native_cpu_disable(void)
 	return -ENOSYS;
 }
 
-void native_cpu_die(unsigned int cpu)
-{
-	/* We said "no" in __cpu_disable */
-	BUG();
-}
-
 void native_play_dead(void)
 {
 	BUG();
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -55,18 +55,16 @@ static void __init xen_hvm_smp_prepare_c
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void xen_hvm_cpu_die(unsigned int cpu)
+static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
 {
-	if (common_cpu_die(cpu) == 0) {
-		if (xen_have_vector_callback) {
-			xen_smp_intr_free(cpu);
-			xen_uninit_lock_cpu(cpu);
-			xen_teardown_timer(cpu);
-		}
+	if (xen_have_vector_callback) {
+		xen_smp_intr_free(cpu);
+		xen_uninit_lock_cpu(cpu);
+		xen_teardown_timer(cpu);
 	}
 }
 #else
-static void xen_hvm_cpu_die(unsigned int cpu)
+static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
 {
 	BUG();
 }
@@ -77,7 +75,7 @@ void __init xen_hvm_smp_init(void)
 	smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
 	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
 	smp_ops.smp_cpus_done = xen_smp_cpus_done;
-	smp_ops.cpu_die = xen_hvm_cpu_die;
+	smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu;
 
 	if (!xen_have_vector_callback) {
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -62,6 +62,7 @@ static void cpu_bringup(void)
 	int cpu;
 
 	cr4_init();
+	cpuhp_ap_sync_alive();
 	cpu_init();
 	touch_softlockup_watchdog();
 
@@ -83,7 +84,7 @@ static void cpu_bringup(void)
 
 	set_cpu_online(cpu, true);
 
-	cpu_set_state_online(cpu);  /* Implies full memory barrier. */
+	smp_mb();
 
 	/* We can take interrupts now: we're officially "up". */
 	local_irq_enable();
@@ -323,14 +324,6 @@ static int xen_pv_cpu_up(unsigned int cp
 
 	xen_setup_runstate_info(cpu);
 
-	/*
-	 * PV VCPUs are always successfully taken down (see 'while' loop
-	 * in xen_cpu_die()), so -EBUSY is an error.
-	 */
-	rc = cpu_check_up_prepare(cpu);
-	if (rc)
-		return rc;
-
 	/* make sure interrupts start blocked */
 	per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 
@@ -349,6 +342,11 @@ static int xen_pv_cpu_up(unsigned int cp
 	return 0;
 }
 
+static void xen_pv_poll_sync_state(void)
+{
+	HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 static int xen_pv_cpu_disable(void)
 {
@@ -364,18 +362,18 @@ static int xen_pv_cpu_disable(void)
 
 static void xen_pv_cpu_die(unsigned int cpu)
 {
-	while (HYPERVISOR_vcpu_op(VCPUOP_is_up,
-				  xen_vcpu_nr(cpu), NULL)) {
+	while (HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), NULL)) {
 		__set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(HZ/10);
 	}
+}
 
-	if (common_cpu_die(cpu) == 0) {
-		xen_smp_intr_free(cpu);
-		xen_uninit_lock_cpu(cpu);
-		xen_teardown_timer(cpu);
-		xen_pmu_finish(cpu);
-	}
+static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
+{
+	xen_smp_intr_free(cpu);
+	xen_uninit_lock_cpu(cpu);
+	xen_teardown_timer(cpu);
+	xen_pmu_finish(cpu);
 }
 
 static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
@@ -397,6 +395,11 @@ static void xen_pv_cpu_die(unsigned int
 	BUG();
 }
 
+static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
+{
+	BUG();
+}
+
 static void __noreturn xen_pv_play_dead(void)
 {
 	BUG();
@@ -437,6 +440,8 @@ static const struct smp_ops xen_smp_ops
 
 	.cpu_up = xen_pv_cpu_up,
 	.cpu_die = xen_pv_cpu_die,
+	.cleanup_dead_cpu = xen_pv_cleanup_dead_cpu,
+	.poll_sync_state = xen_pv_poll_sync_state,
 	.cpu_disable = xen_pv_cpu_disable,
 	.play_dead = xen_pv_play_dead,
[patch V4 21/37] cpu/hotplug: Remove cpu_report_state() and related unused cruft
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

No more users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 include/linux/cpu.h |    2 -
 kernel/smpboot.c    |   90 ----------------------------------------------------
 2 files changed, 92 deletions(-)


--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -184,8 +184,6 @@ void arch_cpu_idle_enter(void);
 void arch_cpu_idle_exit(void);
 void __noreturn arch_cpu_idle_dead(void);
 
-int cpu_report_state(int cpu);
-int cpu_check_up_prepare(int cpu);
 void cpu_set_state_online(int cpu);
 void play_idle_precise(u64 duration_ns, u64 latency_ns);
 
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -329,97 +329,7 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_per
 #ifndef CONFIG_HOTPLUG_CORE_SYNC
 static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
 
-/*
- * Called to poll specified CPU's state, for example, when waiting for
- * a CPU to come online.
- */
-int cpu_report_state(int cpu)
-{
-	return atomic_read(&per_cpu(cpu_hotplug_state, cpu));
-}
-
-/*
- * If CPU has died properly, set its state to CPU_UP_PREPARE and
- * return success.  Otherwise, return -EBUSY if the CPU died after
- * cpu_wait_death() timed out.  And yet otherwise again, return -EAGAIN
- * if cpu_wait_death() timed out and the CPU still hasn't gotten around
- * to dying.  In the latter two cases, the CPU might not be set up
- * properly, but it is up to the arch-specific code to decide.
- * Finally, -EIO indicates an unanticipated problem.
- *
- * Note that it is permissible to omit this call entirely, as is
- * done in architectures that do no CPU-hotplug error checking.
- */
-int cpu_check_up_prepare(int cpu)
-{
-	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
-		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
-		return 0;
-	}
-
-	switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) {
-
-	case CPU_POST_DEAD:
-
-		/* The CPU died properly, so just start it up again. */
-		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
-		return 0;
-
-	case CPU_DEAD_FROZEN:
-
-		/*
-		 * Timeout during CPU death, so let caller know.
-		 * The outgoing CPU completed its processing, but after
-		 * cpu_wait_death() timed out and reported the error. The
-		 * caller is free to proceed, in which case the state
-		 * will be reset properly by cpu_set_state_online().
-		 * Proceeding despite this -EBUSY return makes sense
-		 * for systems where the outgoing CPUs take themselves
-		 * offline, with no post-death manipulation required from
-		 * a surviving CPU.
-		 */
-		return -EBUSY;
-
-	case CPU_BROKEN:
-
-		/*
-		 * The most likely reason we got here is that there was
-		 * a timeout during CPU death, and the outgoing CPU never
-		 * did complete its processing.  This could happen on
-		 * a virtualized system if the outgoing VCPU gets preempted
-		 * for more than five seconds, and the user attempts to
-		 * immediately online that same CPU.  Trying again later
-		 * might return -EBUSY above, hence -EAGAIN.
-		 */
-		return -EAGAIN;
-
-	case CPU_UP_PREPARE:
-		/*
-		 * Timeout while waiting for the CPU to show up. Allow to try
-		 * again later.
-		 */
-		return 0;
-
-	default:
-
-		/* Should not happen.  Famous last words. */
-		return -EIO;
-	}
-}
-
-/*
- * Mark the specified CPU online.
- *
- * Note that it is permissible to omit this call entirely, as is
- * done in architectures that do no CPU-hotplug error checking.
- */
-void cpu_set_state_online(int cpu)
-{
-	(void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
-
 /*
  * Wait for the specified CPU to exit the idle loop and die.
  */
[patch V4 22/37] ARM: smp: Switch to hotplug core state synchronization
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Switch to the CPU hotplug core state tracking and synchronization
mechanim. No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/arm/Kconfig           |    1 +
 arch/arm/include/asm/smp.h |    2 +-
 arch/arm/kernel/smp.c      |   18 +++++++-----------
 3 files changed, 9 insertions(+), 12 deletions(-)
---

--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -124,6 +124,7 @@ config ARM
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UID16
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN
+	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_REL
 	select NEED_DMA_MAP_STATE
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -64,7 +64,7 @@ extern void secondary_startup_arm(void);
 
 extern int __cpu_disable(void);
 
-extern void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -288,15 +288,11 @@ int __cpu_disable(void)
 }
 
 /*
- * called on the thread which is asking for a CPU to be shutdown -
- * waits until shutdown has completed, or it is timed out.
+ * called on the thread which is asking for a CPU to be shutdown after the
+ * shutdown completed.
  */
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
 {
-	if (!cpu_wait_death(cpu, 5)) {
-		pr_err("CPU%u: cpu didn't die\n", cpu);
-		return;
-	}
 	pr_debug("CPU%u: shutdown\n", cpu);
 
 	clear_tasks_mm_cpumask(cpu);
@@ -336,11 +332,11 @@ void __noreturn arch_cpu_idle_dead(void)
 	flush_cache_louis();
 
 	/*
-	 * Tell __cpu_die() that this CPU is now safe to dispose of.  Once
-	 * this returns, power and/or clocks can be removed at any point
-	 * from this CPU and its cache by platform_cpu_kill().
+	 * Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose
+	 * of. Once this returns, power and/or clocks can be removed at
+	 * any point from this CPU and its cache by platform_cpu_kill().
 	 */
-	(void)cpu_report_death();
+	cpuhp_ap_report_dead();
 
 	/*
 	 * Ensure that the cache lines associated with that completion are
[patch V4 23/37] arm64: smp: Switch to hotplug core state synchronization
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Switch to the CPU hotplug core state tracking and synchronization
mechanim. No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/arm64/Kconfig           |    1 +
 arch/arm64/include/asm/smp.h |    2 +-
 arch/arm64/kernel/smp.c      |   14 +++++---------
 3 files changed, 7 insertions(+), 10 deletions(-)


--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -222,6 +222,7 @@ config ARM64
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_GENERIC_VDSO
+	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
 	select KASAN_VMALLOC if KASAN
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -99,7 +99,7 @@ static inline void arch_send_wakeup_ipi_
 
 extern int __cpu_disable(void);
 
-extern void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
 extern void __noreturn cpu_die(void);
 extern void __noreturn cpu_die_early(void);
 
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -332,17 +332,13 @@ static int op_cpu_kill(unsigned int cpu)
 }
 
 /*
- * called on the thread which is asking for a CPU to be shutdown -
- * waits until shutdown has completed, or it is timed out.
+ * Called on the thread which is asking for a CPU to be shutdown after the
+ * shutdown completed.
  */
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
 {
 	int err;
 
-	if (!cpu_wait_death(cpu, 5)) {
-		pr_crit("CPU%u: cpu didn't die\n", cpu);
-		return;
-	}
 	pr_debug("CPU%u: shutdown\n", cpu);
 
 	/*
@@ -369,8 +365,8 @@ void __noreturn cpu_die(void)
 
 	local_daif_mask();
 
-	/* Tell __cpu_die() that this CPU is now safe to dispose of */
-	(void)cpu_report_death();
+	/* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */
+	cpuhp_ap_report_dead();
 
 	/*
 	 * Actually shutdown the CPU. This must never fail. The specific hotplug
[patch V4 24/37] csky/smp: Switch to hotplug core state synchronization
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Switch to the CPU hotplug core state tracking and synchronization
mechanim. No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/csky/Kconfig           |    1 +
 arch/csky/include/asm/smp.h |    2 +-
 arch/csky/kernel/smp.c      |    8 ++------
 3 files changed, 4 insertions(+), 7 deletions(-)


--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -96,6 +96,7 @@ config CSKY
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
+	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select MAY_HAVE_SPARSE_IRQ
 	select MODULES_USE_ELF_RELA if MODULES
 	select OF
--- a/arch/csky/include/asm/smp.h
+++ b/arch/csky/include/asm/smp.h
@@ -23,7 +23,7 @@ void __init set_send_ipi(void (*func)(co
 
 int __cpu_disable(void);
 
-void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
 
 #endif /* CONFIG_SMP */
 
--- a/arch/csky/kernel/smp.c
+++ b/arch/csky/kernel/smp.c
@@ -291,12 +291,8 @@ int __cpu_disable(void)
 	return 0;
 }
 
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
 {
-	if (!cpu_wait_death(cpu, 5)) {
-		pr_crit("CPU%u: shutdown failed\n", cpu);
-		return;
-	}
 	pr_notice("CPU%u: shutdown\n", cpu);
 }
 
@@ -304,7 +300,7 @@ void __noreturn arch_cpu_idle_dead(void)
 {
 	idle_task_exit();
 
-	cpu_report_death();
+	cpuhp_ap_report_dead();
 
 	while (!secondary_stack)
 		arch_cpu_idle();
[patch V4 25/37] MIPS: SMP_CPS: Switch to hotplug core state synchronization
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Switch to the CPU hotplug core state tracking and synchronization
mechanim. This unfortunately requires to add dead reporting to the non CPS
platforms as CPS is the only user, but it allows an overall consolidation
of this functionality.

No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/mips/Kconfig               |    1 +
 arch/mips/cavium-octeon/smp.c   |    1 +
 arch/mips/include/asm/smp-ops.h |    1 +
 arch/mips/kernel/smp-bmips.c    |    1 +
 arch/mips/kernel/smp-cps.c      |   14 +++++---------
 arch/mips/kernel/smp.c          |    8 ++++++++
 arch/mips/loongson64/smp.c      |    1 +
 7 files changed, 18 insertions(+), 9 deletions(-)


--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2285,6 +2285,7 @@ config MIPS_CPS
 	select MIPS_CM
 	select MIPS_CPS_PM if HOTPLUG_CPU
 	select SMP
+	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
 	select SYS_SUPPORTS_HOTPLUG_CPU
 	select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -345,6 +345,7 @@ void play_dead(void)
 	int cpu = cpu_number_map(cvmx_get_core_num());
 
 	idle_task_exit();
+	cpuhp_ap_report_dead();
 	octeon_processor_boot = 0xff;
 	per_cpu(cpu_state, cpu) = CPU_DEAD;
 
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -33,6 +33,7 @@ struct plat_smp_ops {
 #ifdef CONFIG_HOTPLUG_CPU
 	int (*cpu_disable)(void);
 	void (*cpu_die)(unsigned int cpu);
+	void (*cleanup_dead_cpu)(unsigned cpu);
 #endif
 #ifdef CONFIG_KEXEC
 	void (*kexec_nonboot_cpu)(void);
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -392,6 +392,7 @@ static void bmips_cpu_die(unsigned int c
 void __ref play_dead(void)
 {
 	idle_task_exit();
+	cpuhp_ap_report_dead();
 
 	/* flush data cache */
 	_dma_cache_wback_inv(0, ~0);
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -503,8 +503,7 @@ void play_dead(void)
 		}
 	}
 
-	/* This CPU has chosen its way out */
-	(void)cpu_report_death();
+	cpuhp_ap_report_dead();
 
 	cps_shutdown_this_cpu(cpu_death);
 
@@ -527,7 +526,9 @@ static void wait_for_sibling_halt(void *
 	} while (!(halted & TCHALT_H));
 }
 
-static void cps_cpu_die(unsigned int cpu)
+static void cps_cpu_die(unsigned int cpu) { }
+
+static void cps_cleanup_dead_cpu(unsigned cpu)
 {
 	unsigned core = cpu_core(&cpu_data[cpu]);
 	unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]);
@@ -535,12 +536,6 @@ static void cps_cpu_die(unsigned int cpu
 	unsigned stat;
 	int err;
 
-	/* Wait for the cpu to choose its way out */
-	if (!cpu_wait_death(cpu, 5)) {
-		pr_err("CPU%u: didn't offline\n", cpu);
-		return;
-	}
-
 	/*
 	 * Now wait for the CPU to actually offline. Without doing this that
 	 * offlining may race with one or more of:
@@ -624,6 +619,7 @@ static const struct plat_smp_ops cps_smp
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable		= cps_cpu_disable,
 	.cpu_die		= cps_cpu_die,
+	.cleanup_dead_cpu	= cps_cleanup_dead_cpu,
 #endif
 #ifdef CONFIG_KEXEC
 	.kexec_nonboot_cpu	= cps_kexec_nonboot_cpu,
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -690,6 +690,14 @@ void flush_tlb_one(unsigned long vaddr)
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_one);
 
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
+	if (mp_ops->cleanup_dead_cpu)
+		mp_ops->cleanup_dead_cpu(cpu);
+}
+#endif
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 
 static void tick_broadcast_callee(void *info)
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -775,6 +775,7 @@ void play_dead(void)
 	void (*play_dead_at_ckseg1)(int *);
 
 	idle_task_exit();
+	cpuhp_ap_report_dead();
 
 	prid_imp = read_c0_prid() & PRID_IMP_MASK;
 	prid_rev = read_c0_prid() & PRID_REV_MASK;
[patch V4 26/37] parisc: Switch to hotplug core state synchronization
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Switch to the CPU hotplug core state tracking and synchronization
mechanim. No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/parisc/Kconfig          |    1 +
 arch/parisc/kernel/process.c |    4 ++--
 arch/parisc/kernel/smp.c     |    7 +++----
 3 files changed, 6 insertions(+), 6 deletions(-)


--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -57,6 +57,7 @@ config PARISC
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_IRQ_MIGRATION if SMP
 	select HAVE_UNSTABLE_SCHED_CLOCK if SMP
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -166,8 +166,8 @@ void __noreturn arch_cpu_idle_dead(void)
 
 	local_irq_disable();
 
-	/* Tell __cpu_die() that this CPU is now safe to dispose of. */
-	(void)cpu_report_death();
+	/* Tell the core that this CPU is now safe to dispose of. */
+	cpuhp_ap_report_dead();
 
 	/* Ensure that the cache lines are written out. */
 	flush_cache_all_local();
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -500,11 +500,10 @@ int __cpu_disable(void)
 void __cpu_die(unsigned int cpu)
 {
 	pdc_cpu_rendezvous_lock();
+}
 
-	if (!cpu_wait_death(cpu, 5)) {
-		pr_crit("CPU%u: cpu didn't die\n", cpu);
-		return;
-	}
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
 	pr_info("CPU%u: is shutting down\n", cpu);
 
 	/* set task's state to interruptible sleep */
[patch V4 27/37] riscv: Switch to hotplug core state synchronization
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Switch to the CPU hotplug core state tracking and synchronization
mechanim. No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig              |    1 +
 arch/riscv/include/asm/smp.h    |    2 +-
 arch/riscv/kernel/cpu-hotplug.c |   14 +++++++-------
 3 files changed, 9 insertions(+), 8 deletions(-)
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -122,6 +122,7 @@ config RISCV
 	select HAVE_RSEQ
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
+	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
 	select KASAN_VMALLOC if KASAN
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -70,7 +70,7 @@ asmlinkage void smp_callin(void);
 
 #if defined CONFIG_HOTPLUG_CPU
 int __cpu_disable(void);
-void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #else
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/err.h>
 #include <linux/irq.h>
+#include <linux/cpuhotplug.h>
 #include <linux/cpu.h>
 #include <linux/sched/hotplug.h>
 #include <asm/irq.h>
@@ -49,17 +50,15 @@ int __cpu_disable(void)
 	return ret;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
- * Called on the thread which is asking for a CPU to be shutdown.
+ * Called on the thread which is asking for a CPU to be shutdown, if the
+ * CPU reported dead to the hotplug core.
  */
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
 {
 	int ret = 0;
 
-	if (!cpu_wait_death(cpu, 5)) {
-		pr_err("CPU %u: didn't die\n", cpu);
-		return;
-	}
 	pr_notice("CPU%u: off\n", cpu);
 
 	/* Verify from the firmware if the cpu is really stopped*/
@@ -76,9 +75,10 @@ void __noreturn arch_cpu_idle_dead(void)
 {
 	idle_task_exit();
 
-	(void)cpu_report_death();
+	cpuhp_ap_report_dead();
 
 	cpu_ops[smp_processor_id()]->cpu_stop();
 	/* It should never reach here */
 	BUG();
 }
+#endif
[patch V4 28/37] cpu/hotplug: Remove unused state functions
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

All users converted to the hotplug core mechanism.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 include/linux/cpu.h |    2 -
 kernel/smpboot.c    |   75 ----------------------------------------------------
 2 files changed, 77 deletions(-)


--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -193,8 +193,6 @@ static inline void play_idle(unsigned lo
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-bool cpu_wait_death(unsigned int cpu, int seconds);
-bool cpu_report_death(void);
 void cpuhp_report_idle_dead(void);
 #else
 static inline void cpuhp_report_idle_dead(void) { }
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -325,78 +325,3 @@ void smpboot_unregister_percpu_thread(st
 	cpus_read_unlock();
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
-
-#ifndef CONFIG_HOTPLUG_CORE_SYNC
-static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Wait for the specified CPU to exit the idle loop and die.
- */
-bool cpu_wait_death(unsigned int cpu, int seconds)
-{
-	int jf_left = seconds * HZ;
-	int oldstate;
-	bool ret = true;
-	int sleep_jf = 1;
-
-	might_sleep();
-
-	/* The outgoing CPU will normally get done quite quickly. */
-	if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD)
-		goto update_state_early;
-	udelay(5);
-
-	/* But if the outgoing CPU dawdles, wait increasingly long times. */
-	while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) {
-		schedule_timeout_uninterruptible(sleep_jf);
-		jf_left -= sleep_jf;
-		if (jf_left <= 0)
-			break;
-		sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10);
-	}
-update_state_early:
-	oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
-update_state:
-	if (oldstate == CPU_DEAD) {
-		/* Outgoing CPU died normally, update state. */
-		smp_mb(); /* atomic_read() before update. */
-		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD);
-	} else {
-		/* Outgoing CPU still hasn't died, set state accordingly. */
-		if (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
-					&oldstate, CPU_BROKEN))
-			goto update_state;
-		ret = false;
-	}
-	return ret;
-}
-
-/*
- * Called by the outgoing CPU to report its successful death.  Return
- * false if this report follows the surviving CPU's timing out.
- *
- * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU
- * timed out.  This approach allows architectures to omit calls to
- * cpu_check_up_prepare() and cpu_set_state_online() without defeating
- * the next cpu_wait_death()'s polling loop.
- */
-bool cpu_report_death(void)
-{
-	int oldstate;
-	int newstate;
-	int cpu = smp_processor_id();
-
-	oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
-	do {
-		if (oldstate != CPU_BROKEN)
-			newstate = CPU_DEAD;
-		else
-			newstate = CPU_DEAD_FROZEN;
-	} while (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
-				     &oldstate, newstate));
-	return newstate == CPU_DEAD;
-}
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
[patch V4 29/37] cpu/hotplug: Reset task stack state in _cpu_up()
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: David Woodhouse <dwmw@amazon.co.uk>

Commit dce1ca0525bf ("sched/scs: Reset task stack state in bringup_cpu()")
ensured that the shadow call stack and KASAN poisoning were removed from
a CPU's stack each time that CPU is brought up, not just once.

This is not incorrect. However, with parallel bringup the idle thread setup
will happen at a different step. As a consequence the cleanup in
bringup_cpu() would be too late.

Move the SCS/KASAN cleanup to the generic _cpu_up() function instead,
which already ensures that the new CPU's stack is available, purely to
allow for early failure. This occurs when the CPU to be brought up is
in the CPUHP_OFFLINE state, which should correctly do the cleanup any
time the CPU has been taken down to the point where such is needed.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
---
 kernel/cpu.c |   12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)


--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -771,12 +771,6 @@ static int bringup_cpu(unsigned int cpu)
 		return -EAGAIN;
 
 	/*
-	 * Reset stale stack state from the last time this CPU was online.
-	 */
-	scs_task_reset(idle);
-	kasan_unpoison_task_stack(idle);
-
-	/*
 	 * Some architectures have to walk the irq descriptors to
 	 * setup the vector space for the cpu which comes online.
 	 *
@@ -1587,6 +1581,12 @@ static int _cpu_up(unsigned int cpu, int
 			ret = PTR_ERR(idle);
 			goto out;
 		}
+
+		/*
+		 * Reset stale stack state from the last time this CPU was online.
+		 */
+		scs_task_reset(idle);
+		kasan_unpoison_task_stack(idle);
 	}
 
 	cpuhp_tasks_frozen = tasks_frozen;
[patch V4 30/37] cpu/hotplug: Provide a split up CPUHP_BRINGUP mechanism
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

The bring up logic of a to be onlined CPU consists of several parts, which
are considered to be a single hotplug state:

  1) Control CPU issues the wake-up

  2) To be onlined CPU starts up, does the minimal initialization,
     reports to be alive and waits for release into the complete bring-up.

  3) Control CPU waits for the alive report and releases the upcoming CPU
     for the complete bring-up.

Allow to split this into two states:

  1) Control CPU issues the wake-up

     After that the to be onlined CPU starts up, does the minimal
     initialization, reports to be alive and waits for release into the
     full bring-up. As this can run after the control CPU dropped the
     hotplug locks the code which is executed on the AP before it reports
     alive has to be carefully audited to not violate any of the hotplug
     constraints, especially not modifying any of the various cpumasks.

     This is really only meant to avoid waiting for the AP to react on the
     wake-up. Of course an architecture can move strict CPU related setup
     functionality, e.g. microcode loading, with care before the
     synchronization point to save further pointless waiting time.

  2) Control CPU waits for the alive report and releases the upcoming CPU
     for the complete bring-up.

This allows that the two states can be split up to run all to be onlined
CPUs up to state #1 on the control CPU and then at a later point run state
#2. This spares some of the latencies of the full serialized per CPU
bringup by avoiding the per CPU wakeup/wait serialization. The assumption
is that the first AP already waits when the last AP has been woken up. This
obvioulsy depends on the hardware latencies and depending on the timings
this might still not completely eliminate all wait scenarios.

This split is just a preparatory step for enabling the parallel bringup
later. The boot time bringup is still fully serialized. It has a separate
config switch so that architectures which want to support parallel bringup
can test the split of the CPUHP_BRINGUG step separately.

To enable this the architecture must support the CPU hotplug core sync
mechanism and has to be audited that there are no implicit hotplug state
dependencies which require a fully serialized bringup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/Kconfig               |    4 ++
 include/linux/cpuhotplug.h |    4 ++
 kernel/cpu.c               |   70 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 76 insertions(+), 2 deletions(-)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -49,6 +49,10 @@ config HOTPLUG_CORE_SYNC_FULL
 	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select HOTPLUG_CORE_SYNC
 
+config HOTPLUG_SPLIT_STARTUP
+	bool
+	select HOTPLUG_CORE_SYNC_FULL
+
 config GENERIC_ENTRY
 	bool
 
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -133,6 +133,7 @@ enum cpuhp_state {
 	CPUHP_MIPS_SOC_PREPARE,
 	CPUHP_BP_PREPARE_DYN,
 	CPUHP_BP_PREPARE_DYN_END		= CPUHP_BP_PREPARE_DYN + 20,
+	CPUHP_BP_KICK_AP,
 	CPUHP_BRINGUP_CPU,
 
 	/*
@@ -517,9 +518,12 @@ void cpuhp_online_idle(enum cpuhp_state
 static inline void cpuhp_online_idle(enum cpuhp_state state) { }
 #endif
 
+struct task_struct;
+
 void cpuhp_ap_sync_alive(void);
 void arch_cpuhp_sync_state_poll(void);
 void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu);
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle);
 
 #ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
 void cpuhp_ap_report_dead(void);
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -761,6 +761,47 @@ static int bringup_wait_for_ap_online(un
 	return 0;
 }
 
+#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
+static int cpuhp_kick_ap_alive(unsigned int cpu)
+{
+	if (!cpuhp_can_boot_ap(cpu))
+		return -EAGAIN;
+
+	return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
+}
+
+static int cpuhp_bringup_ap(unsigned int cpu)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+	int ret;
+
+	/*
+	 * Some architectures have to walk the irq descriptors to
+	 * setup the vector space for the cpu which comes online.
+	 * Prevent irq alloc/free across the bringup.
+	 */
+	irq_lock_sparse();
+
+	ret = cpuhp_bp_sync_alive(cpu);
+	if (ret)
+		goto out_unlock;
+
+	ret = bringup_wait_for_ap_online(cpu);
+	if (ret)
+		goto out_unlock;
+
+	irq_unlock_sparse();
+
+	if (st->target <= CPUHP_AP_ONLINE_IDLE)
+		return 0;
+
+	return cpuhp_kick_ap(cpu, st, st->target);
+
+out_unlock:
+	irq_unlock_sparse();
+	return ret;
+}
+#else
 static int bringup_cpu(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -781,7 +822,6 @@ static int bringup_cpu(unsigned int cpu)
 	 */
 	irq_lock_sparse();
 
-	/* Arch-specific enabling code. */
 	ret = __cpu_up(cpu, idle);
 	if (ret)
 		goto out_unlock;
@@ -805,6 +845,7 @@ static int bringup_cpu(unsigned int cpu)
 	irq_unlock_sparse();
 	return ret;
 }
+#endif
 
 static int finish_cpu(unsigned int cpu)
 {
@@ -1944,13 +1985,38 @@ static struct cpuhp_step cpuhp_hp_states
 		.startup.single		= timers_prepare_cpu,
 		.teardown.single	= timers_dead_cpu,
 	},
-	/* Kicks the plugged cpu into life */
+
+#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
+	/*
+	 * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
+	 * the next step will release it.
+	 */
+	[CPUHP_BP_KICK_AP] = {
+		.name			= "cpu:kick_ap",
+		.startup.single		= cpuhp_kick_ap_alive,
+	},
+
+	/*
+	 * Waits for the AP to reach cpuhp_ap_sync_alive() and then
+	 * releases it for the complete bringup.
+	 */
+	[CPUHP_BRINGUP_CPU] = {
+		.name			= "cpu:bringup",
+		.startup.single		= cpuhp_bringup_ap,
+		.teardown.single	= finish_cpu,
+		.cant_stop		= true,
+	},
+#else
+	/*
+	 * All-in-one CPU bringup state which includes the kick alive.
+	 */
 	[CPUHP_BRINGUP_CPU] = {
 		.name			= "cpu:bringup",
 		.startup.single		= bringup_cpu,
 		.teardown.single	= finish_cpu,
 		.cant_stop		= true,
 	},
+#endif
 	/* Final state before CPU kills itself */
 	[CPUHP_AP_IDLE_DEAD] = {
 		.name			= "idle:dead",
[patch V4 31/37] x86/smpboot: Enable split CPU startup
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

The x86 CPU bringup state currently does AP wake-up, wait for AP to
respond and then release it for full bringup.

It is safe to be split into a wake-up and and a separate wait+release
state.

Provide the required functions and enable the split CPU bringup, which
prepares for parallel bringup, where the bringup of the non-boot CPUs takes
two iterations: One to prepare and wake all APs and the second to wait and
release them. Depending on timing this can eliminate the wait time
completely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/Kconfig           |    2 +-
 arch/x86/include/asm/smp.h |    9 ++-------
 arch/x86/kernel/smp.c      |    2 +-
 arch/x86/kernel/smpboot.c  |    8 ++++----
 arch/x86/xen/smp_pv.c      |    4 ++--
 5 files changed, 10 insertions(+), 15 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -274,8 +274,8 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_USER_RETURN_NOTIFIER
 	select HAVE_GENERIC_VDSO
-	select HOTPLUG_CORE_SYNC_FULL		if SMP
 	select HOTPLUG_SMT			if SMP
+	select HOTPLUG_SPLIT_STARTUP		if SMP
 	select IRQ_FORCED_THREADING
 	select NEED_PER_CPU_EMBED_FIRST_CHUNK
 	select NEED_PER_CPU_PAGE_FIRST_CHUNK
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -40,7 +40,7 @@ struct smp_ops {
 
 	void (*cleanup_dead_cpu)(unsigned cpu);
 	void (*poll_sync_state)(void);
-	int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
+	int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle);
 	int (*cpu_disable)(void);
 	void (*cpu_die)(unsigned int cpu);
 	void (*play_dead)(void);
@@ -80,11 +80,6 @@ static inline void smp_cpus_done(unsigne
 	smp_ops.smp_cpus_done(max_cpus);
 }
 
-static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-{
-	return smp_ops.cpu_up(cpu, tidle);
-}
-
 static inline int __cpu_disable(void)
 {
 	return smp_ops.cpu_disable();
@@ -124,7 +119,7 @@ void native_smp_prepare_cpus(unsigned in
 void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
 int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
-int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
 int native_cpu_disable(void);
 void __noreturn hlt_play_dead(void);
 void native_play_dead(void);
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -268,7 +268,7 @@ struct smp_ops smp_ops = {
 #endif
 	.smp_send_reschedule	= native_smp_send_reschedule,
 
-	.cpu_up			= native_cpu_up,
+	.kick_ap_alive		= native_kick_ap,
 	.cpu_disable		= native_cpu_disable,
 	.play_dead		= native_play_dead,
 
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1052,7 +1052,7 @@ static int do_boot_cpu(int apicid, int c
 	return ret;
 }
 
-static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
+int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
 {
 	int apicid = apic->cpu_present_to_apicid(cpu);
 	int err;
@@ -1088,15 +1088,15 @@ static int native_kick_ap(unsigned int c
 	return err;
 }
 
-int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
 {
-	return native_kick_ap(cpu, tidle);
+	return smp_ops.kick_ap_alive(cpu, tidle);
 }
 
 void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu)
 {
 	/* Cleanup possible dangling ends... */
-	if (smp_ops.cpu_up == native_cpu_up && x86_platform.legacy.warm_reset)
+	if (smp_ops.kick_ap_alive == native_kick_ap && x86_platform.legacy.warm_reset)
 		smpboot_restore_warm_reset_vector();
 }
 
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -314,7 +314,7 @@ cpu_initialize_context(unsigned int cpu,
 	return 0;
 }
 
-static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
+static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle)
 {
 	int rc;
 
@@ -438,7 +438,7 @@ static const struct smp_ops xen_smp_ops
 	.smp_prepare_cpus = xen_pv_smp_prepare_cpus,
 	.smp_cpus_done = xen_smp_cpus_done,
 
-	.cpu_up = xen_pv_cpu_up,
+	.kick_ap_alive = xen_pv_kick_ap,
 	.cpu_die = xen_pv_cpu_die,
 	.cleanup_dead_cpu = xen_pv_cleanup_dead_cpu,
 	.poll_sync_state = xen_pv_poll_sync_state,
[patch V4 32/37] x86/apic: Provide cpu_primary_thread mask
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Make the primary thread tracking CPU mask based in preparation for simpler
handling of parallel bootup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/include/asm/apic.h     |    2 --
 arch/x86/include/asm/topology.h |   19 +++++++++++++++----
 arch/x86/kernel/apic/apic.c     |   20 +++++++++-----------
 arch/x86/kernel/smpboot.c       |   12 +++---------
 4 files changed, 27 insertions(+), 26 deletions(-)
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -506,10 +506,8 @@ extern int default_check_phys_apicid_pre
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_SMP
-bool apic_id_is_primary_thread(unsigned int id);
 void apic_smt_update(void);
 #else
-static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
 static inline void apic_smt_update(void) { }
 #endif
 
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -31,9 +31,9 @@
  * CONFIG_NUMA.
  */
 #include <linux/numa.h>
+#include <linux/cpumask.h>
 
 #ifdef CONFIG_NUMA
-#include <linux/cpumask.h>
 
 #include <asm/mpspec.h>
 #include <asm/percpu.h>
@@ -139,9 +139,20 @@ static inline int topology_max_smt_threa
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 int topology_update_die_map(unsigned int dieid, unsigned int cpu);
 int topology_phys_to_logical_pkg(unsigned int pkg);
-bool topology_is_primary_thread(unsigned int cpu);
 bool topology_smt_supported(void);
-#else
+
+extern struct cpumask __cpu_primary_thread_mask;
+#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
+
+/**
+ * topology_is_primary_thread - Check whether CPU is the primary SMT thread
+ * @cpu:	CPU to check
+ */
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+	return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
+}
+#else /* CONFIG_SMP */
 #define topology_max_packages()			(1)
 static inline int
 topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
@@ -152,7 +163,7 @@ static inline int topology_max_die_per_p
 static inline int topology_max_smt_threads(void) { return 1; }
 static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
 static inline bool topology_smt_supported(void) { return false; }
-#endif
+#endif /* !CONFIG_SMP */
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
 {
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2386,20 +2386,16 @@ bool arch_match_cpu_phys_id(int cpu, u64
 }
 
 #ifdef CONFIG_SMP
-/**
- * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
- * @apicid: APIC ID to check
- */
-bool apic_id_is_primary_thread(unsigned int apicid)
+static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
 {
-	u32 mask;
-
-	if (smp_num_siblings == 1)
-		return true;
 	/* Isolate the SMT bit(s) in the APICID and check for 0 */
-	mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
-	return !(apicid & mask);
+	u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
+
+	if (smp_num_siblings == 1 || !(apicid & mask))
+		cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
 }
+#else
+static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
 #endif
 
 /*
@@ -2544,6 +2540,8 @@ int generic_processor_info(int apicid, i
 	set_cpu_present(cpu, true);
 	num_processors++;
 
+	cpu_mark_primary_thread(cpu, apicid);
+
 	return cpu;
 }
 
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,6 +102,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
 /* Representing CPUs for which sibling maps can be computed */
 static cpumask_var_t cpu_sibling_setup_mask;
 
@@ -277,15 +280,6 @@ static void notrace start_secondary(void
 }
 
 /**
- * topology_is_primary_thread - Check whether CPU is the primary SMT thread
- * @cpu:	CPU to check
- */
-bool topology_is_primary_thread(unsigned int cpu)
-{
-	return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
-}
-
-/**
  * topology_smt_supported - Check whether SMT is supported by the CPUs
  */
 bool topology_smt_supported(void)
[patch V4 33/37] cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

There is often significant latency in the early stages of CPU bringup, and
time is wasted by waking each CPU (e.g. with SIPI/INIT/INIT on x86) and
then waiting for it to respond before moving on to the next.

Allow a platform to enable parallel setup which brings all to be onlined
CPUs up to the CPUHP_BP_KICK_AP state. While this state advancement on the
control CPU (BP) is single-threaded the important part is the last state
CPUHP_BP_KICK_AP which wakes the to be onlined CPUs up.

This allows the CPUs to run up to the first sychronization point
cpuhp_ap_sync_alive() where they wait for the control CPU to release them
one by one for the full onlining procedure.

This parallelism depends on the CPU hotplug core sync mechanism which
ensures that the parallel brought up CPUs wait for release before touching
any state which would make the CPU visible to anything outside the hotplug
control mechanism.

To handle the SMT constraints of X86 correctly the bringup happens in two
iterations when CONFIG_HOTPLUG_SMT is enabled. The control CPU brings up
the primary SMT threads of each core first, which can load the microcode
without the need to rendevouz with the thread siblings. Once that's
completed it brings up the secondary SMT threads.

Co-developed-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
 Documentation/admin-guide/kernel-parameters.txt |    6 +
 arch/Kconfig                                    |    4 
 include/linux/cpuhotplug.h                      |    1 
 kernel/cpu.c                                    |  103 ++++++++++++++++++++++--
 4 files changed, 109 insertions(+), 5 deletions(-)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -838,6 +838,12 @@
 			on every CPU online, such as boot, and resume from suspend.
 			Default: 10000
 
+	cpuhp.parallel=
+			[SMP] Enable/disable parallel bringup of secondary CPUs
+			Format: <bool>
+			Default is enabled if CONFIG_HOTPLUG_PARALLEL=y. Otherwise
+			the parameter has no effect.
+
 	crash_kexec_post_notifiers
 			Run kdump after running panic-notifiers and dumping
 			kmsg. This only for the users who doubt kdump always
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -53,6 +53,10 @@ config HOTPLUG_SPLIT_STARTUP
 	bool
 	select HOTPLUG_CORE_SYNC_FULL
 
+config HOTPLUG_PARALLEL
+	bool
+	select HOTPLUG_SPLIT_STARTUP
+
 config GENERIC_ENTRY
 	bool
 
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -524,6 +524,7 @@ void cpuhp_ap_sync_alive(void);
 void arch_cpuhp_sync_state_poll(void);
 void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu);
 int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle);
+bool arch_cpuhp_init_parallel_bringup(void);
 
 #ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
 void cpuhp_ap_report_dead(void);
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -649,8 +649,23 @@ bool cpu_smt_possible(void)
 		cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
 }
 EXPORT_SYMBOL_GPL(cpu_smt_possible);
+
+static inline bool cpuhp_smt_aware(void)
+{
+	return topology_smt_supported();
+}
+
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+	return cpu_primary_thread_mask;
+}
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+static inline bool cpuhp_smt_aware(void) { return false; }
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+	return cpu_present_mask;
+}
 #endif
 
 static inline enum cpuhp_state
@@ -1747,16 +1762,94 @@ int bringup_hibernate_cpu(unsigned int s
 	return 0;
 }
 
-void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
+static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
+				      enum cpuhp_state target)
 {
 	unsigned int cpu;
 
-	for_each_present_cpu(cpu) {
-		if (num_online_cpus() >= setup_max_cpus)
+	for_each_cpu(cpu, mask) {
+		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+		if (!--ncpus)
 			break;
-		if (!cpu_online(cpu))
-			cpu_up(cpu, CPUHP_ONLINE);
+
+		if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
+			/*
+			 * If this failed then cpu_up() might have only
+			 * rolled back to CPUHP_BP_KICK_AP for the final
+			 * online. Clean it up. NOOP if already rolled back.
+			 */
+			WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
+		}
+	}
+}
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+static bool __cpuhp_parallel_bringup __ro_after_init = true;
+
+static int __init parallel_bringup_parse_param(char *arg)
+{
+	return kstrtobool(arg, &__cpuhp_parallel_bringup);
+}
+early_param("cpuhp.parallel", parallel_bringup_parse_param);
+
+/*
+ * On architectures which have enabled parallel bringup this invokes all BP
+ * prepare states for each of the to be onlined APs first. The last state
+ * sends the startup IPI to the APs. The APs proceed through the low level
+ * bringup code in parallel and then wait for the control CPU to release
+ * them one by one for the final onlining procedure.
+ *
+ * This avoids waiting for each AP to respond to the startup IPI in
+ * CPUHP_BRINGUP_CPU.
+ */
+static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
+{
+	const struct cpumask *mask = cpu_present_mask;
+
+	if (__cpuhp_parallel_bringup)
+		__cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
+	if (!__cpuhp_parallel_bringup)
+		return false;
+
+	if (cpuhp_smt_aware()) {
+		const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
+		static struct cpumask tmp_mask __initdata;
+
+		/*
+		 * X86 requires to prevent that SMT siblings stopped while
+		 * the primary thread does a microcode update for various
+		 * reasons. Bring the primary threads up first.
+		 */
+		cpumask_and(&tmp_mask, mask, pmask);
+		cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
+		cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
+		/* Account for the online CPUs */
+		ncpus -= num_online_cpus();
+		if (!ncpus)
+			return true;
+		/* Create the mask for secondary CPUs */
+		cpumask_andnot(&tmp_mask, mask, pmask);
+		mask = &tmp_mask;
 	}
+
+	/* Bring the not-yet started CPUs up */
+	cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
+	cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
+	return true;
+}
+#else
+static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
+#endif /* CONFIG_HOTPLUG_PARALLEL */
+
+void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
+{
+	/* Try parallel bringup optimization if enabled */
+	if (cpuhp_bringup_cpus_parallel(setup_max_cpus))
+		return;
+
+	/* Full per CPU serialized bringup */
+	cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE);
 }
 
 #ifdef CONFIG_PM_SLEEP_SMP
Re: [patch V4 33/37] cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
Posted by Mark Brown 11 months, 2 weeks ago
On Fri, May 12, 2023 at 11:07:50PM +0200, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> There is often significant latency in the early stages of CPU bringup, and
> time is wasted by waking each CPU (e.g. with SIPI/INIT/INIT on x86) and
> then waiting for it to respond before moving on to the next.
> 
> Allow a platform to enable parallel setup which brings all to be onlined
> CPUs up to the CPUHP_BP_KICK_AP state. While this state advancement on the
> control CPU (BP) is single-threaded the important part is the last state
> CPUHP_BP_KICK_AP which wakes the to be onlined CPUs up.

We're seeing a regression on ThunderX2 systems with 256 CPUs with an
arm64 defconfig running -next which I've bisected to this patch.  Before
this commit we bring up 256 CPUs:

[   29.137225] GICv3: CPU254: found redistributor 11e03 region 1:0x0000000441f60000
[   29.137238] GICv3: CPU254: using allocated LPI pending table @0x00000008818e0000
[   29.137305] CPU254: Booted secondary processor 0x0000011e03 [0x431f0af1]
[   29.292421] Detected PIPT I-cache on CPU255
[   29.292635] GICv3: CPU255: found redistributor 11f03 region 1:0x0000000441fe0000
[   29.292648] GICv3: CPU255: using allocated LPI pending table @0x00000008818f0000
[   29.292715] CPU255: Booted secondary processor 0x0000011f03 [0x431f0af1]
[   29.292859] smp: Brought up 2 nodes, 256 CPUs
[   29.292864] SMP: Total of 256 processors activated.

but after we only bring up 255, missing the 256th:

[   29.165888] GICv3: CPU254: found redistributor 11e03 region 1:0x0000000441f60000
[   29.165901] GICv3: CPU254: using allocated LPI pending table @0x00000008818e0000
[   29.165968] CPU254: Booted secondary processor 0x0000011e03 [0x431f0af1]
[   29.166120] smp: Brought up 2 nodes, 255 CPUs
[   29.166125] SMP: Total of 255 processors activated.

I can't immediately see an issue with the patch itself, for systems
without CONFIG_HOTPLUG_PARALLEL=y it should replace the loop over
cpu_present_mask done by for_each_present_cpu() with an open coded one.
I didn't check the rest of the series yet.

The KernelCI bisection bot also isolated an issue on Odroid XU3 (a 32
bit arm system) with the final CPU of the 8 on the system not coming up
to the same patch:

  https://groups.io/g/kernelci-results/message/42480?p=%2C%2C%2C20%2C0%2C0%2C0%3A%3Acreated%2C0%2Call-cpus%2C20%2C2%2C0%2C99054444

Other boards I've checked (including some with multiple CPU clusters)
seem to be bringing up all their CPUs so it doesn't seem to just be
general breakage.

Log from my bisect:

git bisect start
# bad: [9f258af06b6268be8e960f63c3f66e88bdbbbdb0] Add linux-next specific files for 20230522
git bisect bad 9f258af06b6268be8e960f63c3f66e88bdbbbdb0
# good: [44c026a73be8038f03dbdeef028b642880cf1511] Linux 6.4-rc3
git bisect good 44c026a73be8038f03dbdeef028b642880cf1511
# good: [914db90ee0172753ab5298a48c63ac4f1fe089cf] Merge branch 'for-linux-next' of git://anongit.freedesktop.org/drm/drm-misc
git bisect good 914db90ee0172753ab5298a48c63ac4f1fe089cf
# good: [4624865b65777295cbe97cf1b98e6e49d81119d3] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git
git bisect good 4624865b65777295cbe97cf1b98e6e49d81119d3
# bad: [be7220c44fbc06825f7f122d06051630e1bf51e4] Merge branch 'for-next' of git://github.com/cminyard/linux-ipmi.git
git bisect bad be7220c44fbc06825f7f122d06051630e1bf51e4
# good: [cc677f7bec0da862a93d176524cdad5f416d58ef] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git
git bisect good cc677f7bec0da862a93d176524cdad5f416d58ef
# bad: [cdcc744aee1b886cbe4737798c0b8178b9ba5ae5] next-20230518/rcu
git bisect bad cdcc744aee1b886cbe4737798c0b8178b9ba5ae5
# bad: [8397dce1586a35af63fe9ea3e8fb3344758e55b5] Merge branch into tip/master: 'x86/mm'
git bisect bad 8397dce1586a35af63fe9ea3e8fb3344758e55b5
# bad: [0c7ffa32dbd6b09a87fea4ad1de8b27145dfd9a6] x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it
git bisect bad 0c7ffa32dbd6b09a87fea4ad1de8b27145dfd9a6
# good: [ab24eb9abb9c60c45119370731735b79ed79f36c] x86/xen/hvm: Get rid of DEAD_FROZEN handling
git bisect good ab24eb9abb9c60c45119370731735b79ed79f36c
# good: [72b11aa7f8f93449141544cecb21b2963416902d] riscv: Switch to hotplug core state synchronization
git bisect good 72b11aa7f8f93449141544cecb21b2963416902d
# good: [f54d4434c281f38b975d58de47adeca671beff4f] x86/apic: Provide cpu_primary_thread mask
git bisect good f54d4434c281f38b975d58de47adeca671beff4f
# bad: [bea629d57d006733d155bdb65ba4867788da69b6] x86/apic: Save the APIC virtual base address
git bisect bad bea629d57d006733d155bdb65ba4867788da69b6
# bad: [18415f33e2ac4ab382cbca8b5ff82a9036b5bd49] cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
git bisect bad 18415f33e2ac4ab382cbca8b5ff82a9036b5bd49
# first bad commit: [18415f33e2ac4ab382cbca8b5ff82a9036b5bd49] cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
Re: [patch V4 33/37] cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
Posted by Thomas Gleixner 11 months, 2 weeks ago
On Mon, May 22 2023 at 20:45, Mark Brown wrote:
> On Fri, May 12, 2023 at 11:07:50PM +0200, Thomas Gleixner wrote:
>> From: Thomas Gleixner <tglx@linutronix.de>
>> 
>> There is often significant latency in the early stages of CPU bringup, and
>> time is wasted by waking each CPU (e.g. with SIPI/INIT/INIT on x86) and
>> then waiting for it to respond before moving on to the next.
>> 
>> Allow a platform to enable parallel setup which brings all to be onlined
>> CPUs up to the CPUHP_BP_KICK_AP state. While this state advancement on the
>> control CPU (BP) is single-threaded the important part is the last state
>> CPUHP_BP_KICK_AP which wakes the to be onlined CPUs up.
>
> We're seeing a regression on ThunderX2 systems with 256 CPUs with an
> arm64 defconfig running -next which I've bisected to this patch.  Before
> this commit we bring up 256 CPUs:
>
> [   29.137225] GICv3: CPU254: found redistributor 11e03 region 1:0x0000000441f60000
> [   29.137238] GICv3: CPU254: using allocated LPI pending table @0x00000008818e0000
> [   29.137305] CPU254: Booted secondary processor 0x0000011e03 [0x431f0af1]
> [   29.292421] Detected PIPT I-cache on CPU255
> [   29.292635] GICv3: CPU255: found redistributor 11f03 region 1:0x0000000441fe0000
> [   29.292648] GICv3: CPU255: using allocated LPI pending table @0x00000008818f0000
> [   29.292715] CPU255: Booted secondary processor 0x0000011f03 [0x431f0af1]
> [   29.292859] smp: Brought up 2 nodes, 256 CPUs
> [   29.292864] SMP: Total of 256 processors activated.
>
> but after we only bring up 255, missing the 256th:
>
> [   29.165888] GICv3: CPU254: found redistributor 11e03 region 1:0x0000000441f60000
> [   29.165901] GICv3: CPU254: using allocated LPI pending table @0x00000008818e0000
> [   29.165968] CPU254: Booted secondary processor 0x0000011e03 [0x431f0af1]
> [   29.166120] smp: Brought up 2 nodes, 255 CPUs
> [   29.166125] SMP: Total of 255 processors activated.
>
> I can't immediately see an issue with the patch itself, for systems
> without CONFIG_HOTPLUG_PARALLEL=y it should replace the loop over
> cpu_present_mask done by for_each_present_cpu() with an open coded one.
> I didn't check the rest of the series yet.
>
> The KernelCI bisection bot also isolated an issue on Odroid XU3 (a 32
> bit arm system) with the final CPU of the 8 on the system not coming up
> to the same patch:
>
>   https://groups.io/g/kernelci-results/message/42480?p=%2C%2C%2C20%2C0%2C0%2C0%3A%3Acreated%2C0%2Call-cpus%2C20%2C2%2C0%2C99054444
>
> Other boards I've checked (including some with multiple CPU clusters)
> seem to be bringing up all their CPUs so it doesn't seem to just be
> general breakage.

That does not make any sense at all and my tired brain does not help
either.

Can you please apply the below debug patch and provide the output?

Thanks,

        tglx
---
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 005f863a3d2b..90a9b2ae8391 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1767,13 +1767,20 @@ static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int n
 {
 	unsigned int cpu;
 
+	pr_info("Bringup max %u CPUs to %d\n", ncpus, target);
+
 	for_each_cpu(cpu, mask) {
 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+		int ret;
+
+		pr_info("Bringup CPU%u left %u\n", cpu, ncpus);
 
 		if (!--ncpus)
 			break;
 
-		if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
+		ret = cpu_up(cpu, target);
+		pr_info("Bringup CPU%u %d\n", cpu, ret);
+		if (ret && can_rollback_cpu(st)) {
 			/*
 			 * If this failed then cpu_up() might have only
 			 * rolled back to CPUHP_BP_KICK_AP for the final
Re: [patch V4 33/37] cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
Posted by Mark Brown 11 months, 2 weeks ago
On Mon, May 22, 2023 at 11:04:17PM +0200, Thomas Gleixner wrote:

> That does not make any sense at all and my tired brain does not help
> either.

> Can you please apply the below debug patch and provide the output?

Here's the log, a quick glance says the 

	if (!--ncpus)
		break;

check is doing the wrong thing when CONFIG_NR_CPUS=256 as it is for
arm64 defconfig and the system actually has 256 CPUs, the Odroid looks
like the same issue as the Exynos defconfig that fails there has
NR_CPUs=8 which is what the board has.

[    0.048542] smp: Bringing up secondary CPUs ...
[    0.048545] Bringup max 256 CPUs to 235
[    0.048547] Bringup CPU0 left 256
[    0.048575] Bringup CPU0 0
[    0.048577] Bringup CPU1 left 255
[    0.124561] Detected PIPT I-cache on CPU1
[    0.124586] GICv3: CPU1: found redistributor 100 region 0:0x0000000401080000
[    0.124595] GICv3: CPU1: using allocated LPI pending table @0x0000000880910000
[    0.124654] CPU1: Booted secondary processor 0x0000000100 [0x431f0af1]
[    0.124759] Bringup CPU1 0
[    0.124763] Bringup CPU2 left 254
[    0.195421] Detected PIPT I-cache on CPU2
[    0.195445] GICv3: CPU2: found redistributor 200 region 0:0x0000000401100000
[    0.195453] GICv3: CPU2: using allocated LPI pending table @0x0000000880920000
[    0.195510] CPU2: Booted secondary processor 0x0000000200 [0x431f0af1]
[    0.195611] Bringup CPU2 0
[    0.195615] Bringup CPU3 left 253
[    0.273859] Detected PIPT I-cache on CPU3
[    0.273885] GICv3: CPU3: found redistributor 300 region 0:0x0000000401180000
[    0.273893] GICv3: CPU3: using allocated LPI pending table @0x0000000880930000
[    0.273949] CPU3: Booted secondary processor 0x0000000300 [0x431f0af1]
[    0.274050] Bringup CPU3 0
[    0.274053] Bringup CPU4 left 252
[    0.351345] Detected PIPT I-cache on CPU4
[    0.351374] GICv3: CPU4: found redistributor 400 region 0:0x0000000401200000
[    0.351382] GICv3: CPU4: using allocated LPI pending table @0x0000000880940000
[    0.351438] CPU4: Booted secondary processor 0x0000000400 [0x431f0af1]
[    0.351540] Bringup CPU4 0
[    0.351543] Bringup CPU5 left 251
[    0.431068] Detected PIPT I-cache on CPU5
[    0.431099] GICv3: CPU5: found redistributor 500 region 0:0x0000000401280000
[    0.431107] GICv3: CPU5: using allocated LPI pending table @0x0000000880950000
[    0.431162] CPU5: Booted secondary processor 0x0000000500 [0x431f0af1]
[    0.431264] Bringup CPU5 0
[    0.431267] Bringup CPU6 left 250
[    0.503403] Detected PIPT I-cache on CPU6
[    0.503435] GICv3: CPU6: found redistributor 600 region 0:0x0000000401300000
[    0.503443] GICv3: CPU6: using allocated LPI pending table @0x0000000880960000
[    0.503498] CPU6: Booted secondary processor 0x0000000600 [0x431f0af1]
[    0.503600] Bringup CPU6 0
[    0.503604] Bringup CPU7 left 249
[    0.580128] Detected PIPT I-cache on CPU7
[    0.580162] GICv3: CPU7: found redistributor 700 region 0:0x0000000401380000
[    0.580171] GICv3: CPU7: using allocated LPI pending table @0x0000000880970000
[    0.580226] CPU7: Booted secondary processor 0x0000000700 [0x431f0af1]
[    0.580328] Bringup CPU7 0
[    0.580332] Bringup CPU8 left 248
[    0.660158] Detected PIPT I-cache on CPU8
[    0.660194] GICv3: CPU8: found redistributor 800 region 0:0x0000000401400000
[    0.660203] GICv3: CPU8: using allocated LPI pending table @0x0000000880980000
[    0.660258] CPU8: Booted secondary processor 0x0000000800 [0x431f0af1]
[    0.660359] Bringup CPU8 0
[    0.660363] Bringup CPU9 left 247
[    0.741063] Detected PIPT I-cache on CPU9
[    0.741102] GICv3: CPU9: found redistributor 900 region 0:0x0000000401480000
[    0.741110] GICv3: CPU9: using allocated LPI pending table @0x0000000880990000
[    0.741166] CPU9: Booted secondary processor 0x0000000900 [0x431f0af1]
[    0.741268] Bringup CPU9 0
[    0.741272] Bringup CPU10 left 246
[    0.817643] Detected PIPT I-cache on CPU10
[    0.817684] GICv3: CPU10: found redistributor a00 region 0:0x0000000401500000
[    0.817692] GICv3: CPU10: using allocated LPI pending table @0x00000008809a0000
[    0.817747] CPU10: Booted secondary processor 0x0000000a00 [0x431f0af1]
[    0.817850] Bringup CPU10 0
[    0.817854] Bringup CPU11 left 245
[    0.896094] Detected PIPT I-cache on CPU11
[    0.896137] GICv3: CPU11: found redistributor b00 region 0:0x0000000401580000
[    0.896145] GICv3: CPU11: using allocated LPI pending table @0x00000008809b0000
[    0.896202] CPU11: Booted secondary processor 0x0000000b00 [0x431f0af1]
[    0.896304] Bringup CPU11 0
[    0.896308] Bringup CPU12 left 244
[    0.976966] Detected PIPT I-cache on CPU12
[    0.977010] GICv3: CPU12: found redistributor c00 region 0:0x0000000401600000
[    0.977018] GICv3: CPU12: using allocated LPI pending table @0x00000008809c0000
[    0.977074] CPU12: Booted secondary processor 0x0000000c00 [0x431f0af1]
[    0.977179] Bringup CPU12 0
[    0.977183] Bringup CPU13 left 243
[    1.053939] Detected PIPT I-cache on CPU13
[    1.053985] GICv3: CPU13: found redistributor d00 region 0:0x0000000401680000
[    1.053994] GICv3: CPU13: using allocated LPI pending table @0x00000008809d0000
[    1.054050] CPU13: Booted secondary processor 0x0000000d00 [0x431f0af1]
[    1.054169] Bringup CPU13 0
[    1.054172] Bringup CPU14 left 242
[    1.133133] Detected PIPT I-cache on CPU14
[    1.133182] GICv3: CPU14: found redistributor e00 region 0:0x0000000401700000
[    1.133190] GICv3: CPU14: using allocated LPI pending table @0x00000008809e0000
[    1.133248] CPU14: Booted secondary processor 0x0000000e00 [0x431f0af1]
[    1.133352] Bringup CPU14 0
[    1.133356] Bringup CPU15 left 241
[    1.214963] Detected PIPT I-cache on CPU15
[    1.215013] GICv3: CPU15: found redistributor f00 region 0:0x0000000401780000
[    1.215022] GICv3: CPU15: using allocated LPI pending table @0x00000008809f0000
[    1.215078] CPU15: Booted secondary processor 0x0000000f00 [0x431f0af1]
[    1.215179] Bringup CPU15 0
[    1.215183] Bringup CPU16 left 240
[    1.292692] Detected PIPT I-cache on CPU16
[    1.292746] GICv3: CPU16: found redistributor 1000 region 0:0x0000000401800000
[    1.292755] GICv3: CPU16: using allocated LPI pending table @0x0000000880a00000
[    1.292810] CPU16: Booted secondary processor 0x0000001000 [0x431f0af1]
[    1.292914] Bringup CPU16 0
[    1.292918] Bringup CPU17 left 239
[    1.371926] Detected PIPT I-cache on CPU17
[    1.371982] GICv3: CPU17: found redistributor 1100 region 0:0x0000000401880000
[    1.371991] GICv3: CPU17: using allocated LPI pending table @0x0000000880a10000
[    1.372047] CPU17: Booted secondary processor 0x0000001100 [0x431f0af1]
[    1.372153] Bringup CPU17 0
[    1.372156] Bringup CPU18 left 238
[    1.454059] Detected PIPT I-cache on CPU18
[    1.454117] GICv3: CPU18: found redistributor 1200 region 0:0x0000000401900000
[    1.454126] GICv3: CPU18: using allocated LPI pending table @0x0000000880a20000
[    1.454182] CPU18: Booted secondary processor 0x0000001200 [0x431f0af1]
[    1.454287] Bringup CPU18 0
[    1.454290] Bringup CPU19 left 237
[    1.530831] Detected PIPT I-cache on CPU19
[    1.530891] GICv3: CPU19: found redistributor 1300 region 0:0x0000000401980000
[    1.530900] GICv3: CPU19: using allocated LPI pending table @0x0000000880a30000
[    1.530956] CPU19: Booted secondary processor 0x0000001300 [0x431f0af1]
[    1.531061] Bringup CPU19 0
[    1.531065] Bringup CPU20 left 236
[    1.609799] Detected PIPT I-cache on CPU20
[    1.609860] GICv3: CPU20: found redistributor 1400 region 0:0x0000000401a00000
[    1.609869] GICv3: CPU20: using allocated LPI pending table @0x0000000880a40000
[    1.609927] CPU20: Booted secondary processor 0x0000001400 [0x431f0af1]
[    1.610032] Bringup CPU20 0
[    1.610036] Bringup CPU21 left 235
[    1.692060] Detected PIPT I-cache on CPU21
[    1.692123] GICv3: CPU21: found redistributor 1500 region 0:0x0000000401a80000
[    1.692132] GICv3: CPU21: using allocated LPI pending table @0x0000000880a50000
[    1.692188] CPU21: Booted secondary processor 0x0000001500 [0x431f0af1]
[    1.692296] Bringup CPU21 0
[    1.692299] Bringup CPU22 left 234
[    1.768877] Detected PIPT I-cache on CPU22
[    1.768942] GICv3: CPU22: found redistributor 1600 region 0:0x0000000401b00000
[    1.768951] GICv3: CPU22: using allocated LPI pending table @0x0000000880a60000
[    1.769008] CPU22: Booted secondary processor 0x0000001600 [0x431f0af1]
[    1.769115] Bringup CPU22 0
[    1.769119] Bringup CPU23 left 233
[    1.847956] Detected PIPT I-cache on CPU23
[    1.848023] GICv3: CPU23: found redistributor 1700 region 0:0x0000000401b80000
[    1.848032] GICv3: CPU23: using allocated LPI pending table @0x0000000880a70000
[    1.848088] CPU23: Booted secondary processor 0x0000001700 [0x431f0af1]
[    1.848196] Bringup CPU23 0
[    1.848199] Bringup CPU24 left 232
[    1.920945] Detected PIPT I-cache on CPU24
[    1.921015] GICv3: CPU24: found redistributor 1800 region 0:0x0000000401c00000
[    1.921024] GICv3: CPU24: using allocated LPI pending table @0x0000000880a80000
[    1.921081] CPU24: Booted secondary processor 0x0000001800 [0x431f0af1]
[    1.921187] Bringup CPU24 0
[    1.921190] Bringup CPU25 left 231
[    1.997836] Detected PIPT I-cache on CPU25
[    1.997908] GICv3: CPU25: found redistributor 1900 region 0:0x0000000401c80000
[    1.997917] GICv3: CPU25: using allocated LPI pending table @0x0000000880a90000
[    1.997973] CPU25: Booted secondary processor 0x0000001900 [0x431f0af1]
[    1.998080] Bringup CPU25 0
[    1.998084] Bringup CPU26 left 230
[    2.071111] Detected PIPT I-cache on CPU26
[    2.071185] GICv3: CPU26: found redistributor 1a00 region 0:0x0000000401d00000
[    2.071194] GICv3: CPU26: using allocated LPI pending table @0x0000000880aa0000
[    2.071250] CPU26: Booted secondary processor 0x0000001a00 [0x431f0af1]
[    2.071358] Bringup CPU26 0
[    2.071362] Bringup CPU27 left 229
[    2.147890] Detected PIPT I-cache on CPU27
[    2.147966] GICv3: CPU27: found redistributor 1b00 region 0:0x0000000401d80000
[    2.147975] GICv3: CPU27: using allocated LPI pending table @0x0000000880ab0000
[    2.148032] CPU27: Booted secondary processor 0x0000001b00 [0x431f0af1]
[    2.148138] Bringup CPU27 0
[    2.148142] Bringup CPU28 left 228
[    2.230081] Detected PIPT I-cache on CPU28
[    2.230159] GICv3: CPU28: found redistributor 1c00 region 0:0x0000000401e00000
[    2.230168] GICv3: CPU28: using allocated LPI pending table @0x0000000880ac0000
[    2.230224] CPU28: Booted secondary processor 0x0000001c00 [0x431f0af1]
[    2.230343] Bringup CPU28 0
[    2.230347] Bringup CPU29 left 227
[    2.311933] Detected PIPT I-cache on CPU29
[    2.312012] GICv3: CPU29: found redistributor 1d00 region 0:0x0000000401e80000
[    2.312021] GICv3: CPU29: using allocated LPI pending table @0x0000000880ad0000
[    2.312077] CPU29: Booted secondary processor 0x0000001d00 [0x431f0af1]
[    2.312183] Bringup CPU29 0
[    2.312187] Bringup CPU30 left 226
[    2.392181] Detected PIPT I-cache on CPU30
[    2.392263] GICv3: CPU30: found redistributor 1e00 region 0:0x0000000401f00000
[    2.392272] GICv3: CPU30: using allocated LPI pending table @0x0000000880ae0000
[    2.392328] CPU30: Booted secondary processor 0x0000001e00 [0x431f0af1]
[    2.392435] Bringup CPU30 0
[    2.392439] Bringup CPU31 left 225
[    2.466927] Detected PIPT I-cache on CPU31
[    2.467010] GICv3: CPU31: found redistributor 1f00 region 0:0x0000000401f80000
[    2.467020] GICv3: CPU31: using allocated LPI pending table @0x0000000880af0000
[    2.467076] CPU31: Booted secondary processor 0x0000001f00 [0x431f0af1]
[    2.467184] Bringup CPU31 0
[    2.467188] Bringup CPU32 left 224
[    2.535705] Detected PIPT I-cache on CPU32
[    2.535734] GICv3: CPU32: found redistributor 1 region 0:0x0000000401020000
[    2.535743] GICv3: CPU32: using allocated LPI pending table @0x0000000880b00000
[    2.535798] CPU32: Booted secondary processor 0x0000000001 [0x431f0af1]
[    2.535910] Bringup CPU32 0
[    2.535914] Bringup CPU33 left 223
[    2.604057] Detected PIPT I-cache on CPU33
[    2.604084] GICv3: CPU33: found redistributor 101 region 0:0x00000004010a0000
[    2.604092] GICv3: CPU33: using allocated LPI pending table @0x0000000880b10000
[    2.604145] CPU33: Booted secondary processor 0x0000000101 [0x431f0af1]
[    2.604235] Bringup CPU33 0
[    2.604237] Bringup CPU34 left 222
[    2.672660] Detected PIPT I-cache on CPU34
[    2.672691] GICv3: CPU34: found redistributor 201 region 0:0x0000000401120000
[    2.672699] GICv3: CPU34: using allocated LPI pending table @0x0000000880b20000
[    2.672753] CPU34: Booted secondary processor 0x0000000201 [0x431f0af1]
[    2.672843] Bringup CPU34 0
[    2.672846] Bringup CPU35 left 221
[    2.740986] Detected PIPT I-cache on CPU35
[    2.741018] GICv3: CPU35: found redistributor 301 region 0:0x00000004011a0000
[    2.741027] GICv3: CPU35: using allocated LPI pending table @0x0000000880b30000
[    2.741080] CPU35: Booted secondary processor 0x0000000301 [0x431f0af1]
[    2.741169] Bringup CPU35 0
[    2.741172] Bringup CPU36 left 220
[    2.809469] Detected PIPT I-cache on CPU36
[    2.809503] GICv3: CPU36: found redistributor 401 region 0:0x0000000401220000
[    2.809511] GICv3: CPU36: using allocated LPI pending table @0x0000000880b40000
[    2.809564] CPU36: Booted secondary processor 0x0000000401 [0x431f0af1]
[    2.809654] Bringup CPU36 0
[    2.809657] Bringup CPU37 left 219
[    2.878071] Detected PIPT I-cache on CPU37
[    2.878108] GICv3: CPU37: found redistributor 501 region 0:0x00000004012a0000
[    2.878116] GICv3: CPU37: using allocated LPI pending table @0x0000000880b50000
[    2.878169] CPU37: Booted secondary processor 0x0000000501 [0x431f0af1]
[    2.878259] Bringup CPU37 0
[    2.878262] Bringup CPU38 left 218
[    2.946487] Detected PIPT I-cache on CPU38
[    2.946525] GICv3: CPU38: found redistributor 601 region 0:0x0000000401320000
[    2.946533] GICv3: CPU38: using allocated LPI pending table @0x0000000880b60000
[    2.946587] CPU38: Booted secondary processor 0x0000000601 [0x431f0af1]
[    2.946677] Bringup CPU38 0
[    2.946680] Bringup CPU39 left 217
[    3.014963] Detected PIPT I-cache on CPU39
[    3.015003] GICv3: CPU39: found redistributor 701 region 0:0x00000004013a0000
[    3.015012] GICv3: CPU39: using allocated LPI pending table @0x0000000880b70000
[    3.015064] CPU39: Booted secondary processor 0x0000000701 [0x431f0af1]
[    3.015158] Bringup CPU39 0
[    3.015161] Bringup CPU40 left 216
[    3.083400] Detected PIPT I-cache on CPU40
[    3.083443] GICv3: CPU40: found redistributor 801 region 0:0x0000000401420000
[    3.083451] GICv3: CPU40: using allocated LPI pending table @0x0000000880b80000
[    3.083504] CPU40: Booted secondary processor 0x0000000801 [0x431f0af1]
[    3.083598] Bringup CPU40 0
[    3.083601] Bringup CPU41 left 215
[    3.152018] Detected PIPT I-cache on CPU41
[    3.152063] GICv3: CPU41: found redistributor 901 region 0:0x00000004014a0000
[    3.152071] GICv3: CPU41: using allocated LPI pending table @0x0000000880b90000
[    3.152123] CPU41: Booted secondary processor 0x0000000901 [0x431f0af1]
[    3.152211] Bringup CPU41 0
[    3.152214] Bringup CPU42 left 214
[    3.220435] Detected PIPT I-cache on CPU42
[    3.220481] GICv3: CPU42: found redistributor a01 region 0:0x0000000401520000
[    3.220489] GICv3: CPU42: using allocated LPI pending table @0x0000000880ba0000
[    3.220542] CPU42: Booted secondary processor 0x0000000a01 [0x431f0af1]
[    3.220634] Bringup CPU42 0
[    3.220637] Bringup CPU43 left 213
[    3.288786] Detected PIPT I-cache on CPU43
[    3.288833] GICv3: CPU43: found redistributor b01 region 0:0x00000004015a0000
[    3.288842] GICv3: CPU43: using allocated LPI pending table @0x0000000880bb0000
[    3.288895] CPU43: Booted secondary processor 0x0000000b01 [0x431f0af1]
[    3.288985] Bringup CPU43 0
[    3.288988] Bringup CPU44 left 212
[    3.357271] Detected PIPT I-cache on CPU44
[    3.357321] GICv3: CPU44: found redistributor c01 region 0:0x0000000401620000
[    3.357329] GICv3: CPU44: using allocated LPI pending table @0x0000000880bc0000
[    3.357382] CPU44: Booted secondary processor 0x0000000c01 [0x431f0af1]
[    3.357475] Bringup CPU44 0
[    3.357478] Bringup CPU45 left 211
[    3.426136] Detected PIPT I-cache on CPU45
[    3.426188] GICv3: CPU45: found redistributor d01 region 0:0x00000004016a0000
[    3.426196] GICv3: CPU45: using allocated LPI pending table @0x0000000880bd0000
[    3.426249] CPU45: Booted secondary processor 0x0000000d01 [0x431f0af1]
[    3.426342] Bringup CPU45 0
[    3.426345] Bringup CPU46 left 210
[    3.494971] Detected PIPT I-cache on CPU46
[    3.495026] GICv3: CPU46: found redistributor e01 region 0:0x0000000401720000
[    3.495034] GICv3: CPU46: using allocated LPI pending table @0x0000000880be0000
[    3.495087] CPU46: Booted secondary processor 0x0000000e01 [0x431f0af1]
[    3.495178] Bringup CPU46 0
[    3.495181] Bringup CPU47 left 209
[    3.563244] Detected PIPT I-cache on CPU47
[    3.563300] GICv3: CPU47: found redistributor f01 region 0:0x00000004017a0000
[    3.563308] GICv3: CPU47: using allocated LPI pending table @0x0000000880bf0000
[    3.563362] CPU47: Booted secondary processor 0x0000000f01 [0x431f0af1]
[    3.563453] Bringup CPU47 0
[    3.563455] Bringup CPU48 left 208
[    3.631916] Detected PIPT I-cache on CPU48
[    3.631975] GICv3: CPU48: found redistributor 1001 region 0:0x0000000401820000
[    3.631984] GICv3: CPU48: using allocated LPI pending table @0x0000000880c00000
[    3.632036] CPU48: Booted secondary processor 0x0000001001 [0x431f0af1]
[    3.632129] Bringup CPU48 0
[    3.632132] Bringup CPU49 left 207
[    3.700602] Detected PIPT I-cache on CPU49
[    3.700664] GICv3: CPU49: found redistributor 1101 region 0:0x00000004018a0000
[    3.700672] GICv3: CPU49: using allocated LPI pending table @0x0000000880c10000
[    3.700725] CPU49: Booted secondary processor 0x0000001101 [0x431f0af1]
[    3.700817] Bringup CPU49 0
[    3.700820] Bringup CPU50 left 206
[    3.769387] Detected PIPT I-cache on CPU50
[    3.769450] GICv3: CPU50: found redistributor 1201 region 0:0x0000000401920000
[    3.769458] GICv3: CPU50: using allocated LPI pending table @0x0000000880c20000
[    3.769512] CPU50: Booted secondary processor 0x0000001201 [0x431f0af1]
[    3.769605] Bringup CPU50 0
[    3.769607] Bringup CPU51 left 205
[    3.838160] Detected PIPT I-cache on CPU51
[    3.838225] GICv3: CPU51: found redistributor 1301 region 0:0x00000004019a0000
[    3.838233] GICv3: CPU51: using allocated LPI pending table @0x0000000880c30000
[    3.838286] CPU51: Booted secondary processor 0x0000001301 [0x431f0af1]
[    3.838379] Bringup CPU51 0
[    3.838381] Bringup CPU52 left 204
[    3.906682] Detected PIPT I-cache on CPU52
[    3.906749] GICv3: CPU52: found redistributor 1401 region 0:0x0000000401a20000
[    3.906757] GICv3: CPU52: using allocated LPI pending table @0x0000000880c40000
[    3.906810] CPU52: Booted secondary processor 0x0000001401 [0x431f0af1]
[    3.906905] Bringup CPU52 0
[    3.906907] Bringup CPU53 left 203
[    3.975408] Detected PIPT I-cache on CPU53
[    3.975477] GICv3: CPU53: found redistributor 1501 region 0:0x0000000401aa0000
[    3.975485] GICv3: CPU53: using allocated LPI pending table @0x0000000880c50000
[    3.975538] CPU53: Booted secondary processor 0x0000001501 [0x431f0af1]
[    3.975631] Bringup CPU53 0
[    3.975633] Bringup CPU54 left 202
[    4.044084] Detected PIPT I-cache on CPU54
[    4.044154] GICv3: CPU54: found redistributor 1601 region 0:0x0000000401b20000
[    4.044162] GICv3: CPU54: using allocated LPI pending table @0x0000000880c60000
[    4.044216] CPU54: Booted secondary processor 0x0000001601 [0x431f0af1]
[    4.044309] Bringup CPU54 0
[    4.044312] Bringup CPU55 left 201
[    4.112725] Detected PIPT I-cache on CPU55
[    4.112797] GICv3: CPU55: found redistributor 1701 region 0:0x0000000401ba0000
[    4.112805] GICv3: CPU55: using allocated LPI pending table @0x0000000880c70000
[    4.112858] CPU55: Booted secondary processor 0x0000001701 [0x431f0af1]
[    4.112952] Bringup CPU55 0
[    4.112954] Bringup CPU56 left 200
[    4.181726] Detected PIPT I-cache on CPU56
[    4.181801] GICv3: CPU56: found redistributor 1801 region 0:0x0000000401c20000
[    4.181809] GICv3: CPU56: using allocated LPI pending table @0x0000000880c80000
[    4.181863] CPU56: Booted secondary processor 0x0000001801 [0x431f0af1]
[    4.181956] Bringup CPU56 0
[    4.181959] Bringup CPU57 left 199
[    4.250209] Detected PIPT I-cache on CPU57
[    4.250285] GICv3: CPU57: found redistributor 1901 region 0:0x0000000401ca0000
[    4.250293] GICv3: CPU57: using allocated LPI pending table @0x0000000880c90000
[    4.250347] CPU57: Booted secondary processor 0x0000001901 [0x431f0af1]
[    4.250441] Bringup CPU57 0
[    4.250443] Bringup CPU58 left 198
[    4.319062] Detected PIPT I-cache on CPU58
[    4.319141] GICv3: CPU58: found redistributor 1a01 region 0:0x0000000401d20000
[    4.319149] GICv3: CPU58: using allocated LPI pending table @0x0000000880ca0000
[    4.319203] CPU58: Booted secondary processor 0x0000001a01 [0x431f0af1]
[    4.319298] Bringup CPU58 0
[    4.319300] Bringup CPU59 left 197
[    4.387907] Detected PIPT I-cache on CPU59
[    4.387988] GICv3: CPU59: found redistributor 1b01 region 0:0x0000000401da0000
[    4.387996] GICv3: CPU59: using allocated LPI pending table @0x0000000880cb0000
[    4.388050] CPU59: Booted secondary processor 0x0000001b01 [0x431f0af1]
[    4.388143] Bringup CPU59 0
[    4.388146] Bringup CPU60 left 196
[    4.456533] Detected PIPT I-cache on CPU60
[    4.456615] GICv3: CPU60: found redistributor 1c01 region 0:0x0000000401e20000
[    4.456624] GICv3: CPU60: using allocated LPI pending table @0x0000000880cc0000
[    4.456679] CPU60: Booted secondary processor 0x0000001c01 [0x431f0af1]
[    4.456792] Bringup CPU60 0
[    4.456795] Bringup CPU61 left 195
[    4.525028] Detected PIPT I-cache on CPU61
[    4.525113] GICv3: CPU61: found redistributor 1d01 region 0:0x0000000401ea0000
[    4.525121] GICv3: CPU61: using allocated LPI pending table @0x0000000880cd0000
[    4.525174] CPU61: Booted secondary processor 0x0000001d01 [0x431f0af1]
[    4.525269] Bringup CPU61 0
[    4.525272] Bringup CPU62 left 194
[    4.593720] Detected PIPT I-cache on CPU62
[    4.593808] GICv3: CPU62: found redistributor 1e01 region 0:0x0000000401f20000
[    4.593816] GICv3: CPU62: using allocated LPI pending table @0x0000000880ce0000
[    4.593868] CPU62: Booted secondary processor 0x0000001e01 [0x431f0af1]
[    4.593962] Bringup CPU62 0
[    4.593965] Bringup CPU63 left 193
[    4.662153] Detected PIPT I-cache on CPU63
[    4.662241] GICv3: CPU63: found redistributor 1f01 region 0:0x0000000401fa0000
[    4.662249] GICv3: CPU63: using allocated LPI pending table @0x0000000880cf0000
[    4.662301] CPU63: Booted secondary processor 0x0000001f01 [0x431f0af1]
[    4.662399] Bringup CPU63 0
[    4.662402] Bringup CPU64 left 192
[    4.731197] Detected PIPT I-cache on CPU64
[    4.731234] GICv3: CPU64: found redistributor 2 region 0:0x0000000401040000
[    4.731243] GICv3: CPU64: using allocated LPI pending table @0x0000000880d00000
[    4.731296] CPU64: Booted secondary processor 0x0000000002 [0x431f0af1]
[    4.731396] Bringup CPU64 0
[    4.731399] Bringup CPU65 left 191
[    4.799636] Detected PIPT I-cache on CPU65
[    4.799671] GICv3: CPU65: found redistributor 102 region 0:0x00000004010c0000
[    4.799679] GICv3: CPU65: using allocated LPI pending table @0x0000000880d10000
[    4.799731] CPU65: Booted secondary processor 0x0000000102 [0x431f0af1]
[    4.799822] Bringup CPU65 0
[    4.799825] Bringup CPU66 left 190
[    4.868445] Detected PIPT I-cache on CPU66
[    4.868486] GICv3: CPU66: found redistributor 202 region 0:0x0000000401140000
[    4.868494] GICv3: CPU66: using allocated LPI pending table @0x0000000880d20000
[    4.868546] CPU66: Booted secondary processor 0x0000000202 [0x431f0af1]
[    4.868638] Bringup CPU66 0
[    4.868640] Bringup CPU67 left 189
[    4.937078] Detected PIPT I-cache on CPU67
[    4.937120] GICv3: CPU67: found redistributor 302 region 0:0x00000004011c0000
[    4.937128] GICv3: CPU67: using allocated LPI pending table @0x0000000880d30000
[    4.937180] CPU67: Booted secondary processor 0x0000000302 [0x431f0af1]
[    4.937276] Bringup CPU67 0
[    4.937278] Bringup CPU68 left 188
[    5.005580] Detected PIPT I-cache on CPU68
[    5.005623] GICv3: CPU68: found redistributor 402 region 0:0x0000000401240000
[    5.005631] GICv3: CPU68: using allocated LPI pending table @0x0000000880d40000
[    5.005683] CPU68: Booted secondary processor 0x0000000402 [0x431f0af1]
[    5.005776] Bringup CPU68 0
[    5.005779] Bringup CPU69 left 187
[    5.074306] Detected PIPT I-cache on CPU69
[    5.074352] GICv3: CPU69: found redistributor 502 region 0:0x00000004012c0000
[    5.074360] GICv3: CPU69: using allocated LPI pending table @0x0000000880d50000
[    5.074412] CPU69: Booted secondary processor 0x0000000502 [0x431f0af1]
[    5.074506] Bringup CPU69 0
[    5.074508] Bringup CPU70 left 186
[    5.142762] Detected PIPT I-cache on CPU70
[    5.142810] GICv3: CPU70: found redistributor 602 region 0:0x0000000401340000
[    5.142818] GICv3: CPU70: using allocated LPI pending table @0x0000000880d60000
[    5.142870] CPU70: Booted secondary processor 0x0000000602 [0x431f0af1]
[    5.142964] Bringup CPU70 0
[    5.142967] Bringup CPU71 left 185
[    5.211351] Detected PIPT I-cache on CPU71
[    5.211400] GICv3: CPU71: found redistributor 702 region 0:0x00000004013c0000
[    5.211408] GICv3: CPU71: using allocated LPI pending table @0x0000000880d70000
[    5.211460] CPU71: Booted secondary processor 0x0000000702 [0x431f0af1]
[    5.211552] Bringup CPU71 0
[    5.211555] Bringup CPU72 left 184
[    5.279995] Detected PIPT I-cache on CPU72
[    5.280048] GICv3: CPU72: found redistributor 802 region 0:0x0000000401440000
[    5.280056] GICv3: CPU72: using allocated LPI pending table @0x0000000880d80000
[    5.280109] CPU72: Booted secondary processor 0x0000000802 [0x431f0af1]
[    5.280201] Bringup CPU72 0
[    5.280204] Bringup CPU73 left 183
[    5.348712] Detected PIPT I-cache on CPU73
[    5.348766] GICv3: CPU73: found redistributor 902 region 0:0x00000004014c0000
[    5.348774] GICv3: CPU73: using allocated LPI pending table @0x0000000880d90000
[    5.348826] CPU73: Booted secondary processor 0x0000000902 [0x431f0af1]
[    5.348920] Bringup CPU73 0
[    5.348923] Bringup CPU74 left 182
[    5.417266] Detected PIPT I-cache on CPU74
[    5.417321] GICv3: CPU74: found redistributor a02 region 0:0x0000000401540000
[    5.417329] GICv3: CPU74: using allocated LPI pending table @0x0000000880da0000
[    5.417381] CPU74: Booted secondary processor 0x0000000a02 [0x431f0af1]
[    5.417477] Bringup CPU74 0
[    5.417480] Bringup CPU75 left 181
[    5.485736] Detected PIPT I-cache on CPU75
[    5.485793] GICv3: CPU75: found redistributor b02 region 0:0x00000004015c0000
[    5.485801] GICv3: CPU75: using allocated LPI pending table @0x0000000880db0000
[    5.485854] CPU75: Booted secondary processor 0x0000000b02 [0x431f0af1]
[    5.485972] Bringup CPU75 0
[    5.485975] Bringup CPU76 left 180
[    5.554355] Detected PIPT I-cache on CPU76
[    5.554414] GICv3: CPU76: found redistributor c02 region 0:0x0000000401640000
[    5.554422] GICv3: CPU76: using allocated LPI pending table @0x0000000880dc0000
[    5.554475] CPU76: Booted secondary processor 0x0000000c02 [0x431f0af1]
[    5.554569] Bringup CPU76 0
[    5.554572] Bringup CPU77 left 179
[    5.623035] Detected PIPT I-cache on CPU77
[    5.623097] GICv3: CPU77: found redistributor d02 region 0:0x00000004016c0000
[    5.623105] GICv3: CPU77: using allocated LPI pending table @0x0000000880dd0000
[    5.623157] CPU77: Booted secondary processor 0x0000000d02 [0x431f0af1]
[    5.623249] Bringup CPU77 0
[    5.623252] Bringup CPU78 left 178
[    5.691609] Detected PIPT I-cache on CPU78
[    5.691674] GICv3: CPU78: found redistributor e02 region 0:0x0000000401740000
[    5.691683] GICv3: CPU78: using allocated LPI pending table @0x0000000880de0000
[    5.691735] CPU78: Booted secondary processor 0x0000000e02 [0x431f0af1]
[    5.691829] Bringup CPU78 0
[    5.691832] Bringup CPU79 left 177
[    5.760010] Detected PIPT I-cache on CPU79
[    5.760075] GICv3: CPU79: found redistributor f02 region 0:0x00000004017c0000
[    5.760083] GICv3: CPU79: using allocated LPI pending table @0x0000000880df0000
[    5.760136] CPU79: Booted secondary processor 0x0000000f02 [0x431f0af1]
[    5.760230] Bringup CPU79 0
[    5.760232] Bringup CPU80 left 176
[    5.828861] Detected PIPT I-cache on CPU80
[    5.828929] GICv3: CPU80: found redistributor 1002 region 0:0x0000000401840000
[    5.828938] GICv3: CPU80: using allocated LPI pending table @0x0000000880e00000
[    5.828991] CPU80: Booted secondary processor 0x0000001002 [0x431f0af1]
[    5.829090] Bringup CPU80 0
[    5.829093] Bringup CPU81 left 175
[    5.897816] Detected PIPT I-cache on CPU81
[    5.897886] GICv3: CPU81: found redistributor 1102 region 0:0x00000004018c0000
[    5.897894] GICv3: CPU81: using allocated LPI pending table @0x0000000880e10000
[    5.897946] CPU81: Booted secondary processor 0x0000001102 [0x431f0af1]
[    5.898043] Bringup CPU81 0
[    5.898046] Bringup CPU82 left 174
[    5.966694] Detected PIPT I-cache on CPU82
[    5.966767] GICv3: CPU82: found redistributor 1202 region 0:0x0000000401940000
[    5.966776] GICv3: CPU82: using allocated LPI pending table @0x0000000880e20000
[    5.966828] CPU82: Booted secondary processor 0x0000001202 [0x431f0af1]
[    5.966925] Bringup CPU82 0
[    5.966927] Bringup CPU83 left 173
[    6.035887] Detected PIPT I-cache on CPU83
[    6.035962] GICv3: CPU83: found redistributor 1302 region 0:0x00000004019c0000
[    6.035971] GICv3: CPU83: using allocated LPI pending table @0x0000000880e30000
[    6.036023] CPU83: Booted secondary processor 0x0000001302 [0x431f0af1]
[    6.036118] Bringup CPU83 0
[    6.036121] Bringup CPU84 left 172
[    6.104513] Detected PIPT I-cache on CPU84
[    6.104588] GICv3: CPU84: found redistributor 1402 region 0:0x0000000401a40000
[    6.104597] GICv3: CPU84: using allocated LPI pending table @0x0000000880e40000
[    6.104650] CPU84: Booted secondary processor 0x0000001402 [0x431f0af1]
[    6.104745] Bringup CPU84 0
[    6.104747] Bringup CPU85 left 171
[    6.173344] Detected PIPT I-cache on CPU85
[    6.173421] GICv3: CPU85: found redistributor 1502 region 0:0x0000000401ac0000
[    6.173430] GICv3: CPU85: using allocated LPI pending table @0x0000000880e50000
[    6.173483] CPU85: Booted secondary processor 0x0000001502 [0x431f0af1]
[    6.173579] Bringup CPU85 0
[    6.173582] Bringup CPU86 left 170
[    6.242129] Detected PIPT I-cache on CPU86
[    6.242208] GICv3: CPU86: found redistributor 1602 region 0:0x0000000401b40000
[    6.242217] GICv3: CPU86: using allocated LPI pending table @0x0000000880e60000
[    6.242270] CPU86: Booted secondary processor 0x0000001602 [0x431f0af1]
[    6.242366] Bringup CPU86 0
[    6.242369] Bringup CPU87 left 169
[    6.310865] Detected PIPT I-cache on CPU87
[    6.310946] GICv3: CPU87: found redistributor 1702 region 0:0x0000000401bc0000
[    6.310955] GICv3: CPU87: using allocated LPI pending table @0x0000000880e70000
[    6.311007] CPU87: Booted secondary processor 0x0000001702 [0x431f0af1]
[    6.311104] Bringup CPU87 0
[    6.311107] Bringup CPU88 left 168
[    6.379680] Detected PIPT I-cache on CPU88
[    6.379765] GICv3: CPU88: found redistributor 1802 region 0:0x0000000401c40000
[    6.379774] GICv3: CPU88: using allocated LPI pending table @0x0000000880e80000
[    6.379827] CPU88: Booted secondary processor 0x0000001802 [0x431f0af1]
[    6.379924] Bringup CPU88 0
[    6.379927] Bringup CPU89 left 167
[    6.448290] Detected PIPT I-cache on CPU89
[    6.448376] GICv3: CPU89: found redistributor 1902 region 0:0x0000000401cc0000
[    6.448385] GICv3: CPU89: using allocated LPI pending table @0x0000000880e90000
[    6.448438] CPU89: Booted secondary processor 0x0000001902 [0x431f0af1]
[    6.448536] Bringup CPU89 0
[    6.448538] Bringup CPU90 left 166
[    6.517179] Detected PIPT I-cache on CPU90
[    6.517268] GICv3: CPU90: found redistributor 1a02 region 0:0x0000000401d40000
[    6.517277] GICv3: CPU90: using allocated LPI pending table @0x0000000880ea0000
[    6.517330] CPU90: Booted secondary processor 0x0000001a02 [0x431f0af1]
[    6.517426] Bringup CPU90 0
[    6.517429] Bringup CPU91 left 165
[    6.585792] Detected PIPT I-cache on CPU91
[    6.585881] GICv3: CPU91: found redistributor 1b02 region 0:0x0000000401dc0000
[    6.585890] GICv3: CPU91: using allocated LPI pending table @0x0000000880eb0000
[    6.585944] CPU91: Booted secondary processor 0x0000001b02 [0x431f0af1]
[    6.586065] Bringup CPU91 0
[    6.586069] Bringup CPU92 left 164
[    6.654569] Detected PIPT I-cache on CPU92
[    6.654660] GICv3: CPU92: found redistributor 1c02 region 0:0x0000000401e40000
[    6.654669] GICv3: CPU92: using allocated LPI pending table @0x0000000880ec0000
[    6.654722] CPU92: Booted secondary processor 0x0000001c02 [0x431f0af1]
[    6.654821] Bringup CPU92 0
[    6.654824] Bringup CPU93 left 163
[    6.723164] Detected PIPT I-cache on CPU93
[    6.723258] GICv3: CPU93: found redistributor 1d02 region 0:0x0000000401ec0000
[    6.723267] GICv3: CPU93: using allocated LPI pending table @0x0000000880ed0000
[    6.723320] CPU93: Booted secondary processor 0x0000001d02 [0x431f0af1]
[    6.723415] Bringup CPU93 0
[    6.723418] Bringup CPU94 left 162
[    6.791967] Detected PIPT I-cache on CPU94
[    6.792065] GICv3: CPU94: found redistributor 1e02 region 0:0x0000000401f40000
[    6.792074] GICv3: CPU94: using allocated LPI pending table @0x0000000880ee0000
[    6.792127] CPU94: Booted secondary processor 0x0000001e02 [0x431f0af1]
[    6.792226] Bringup CPU94 0
[    6.792229] Bringup CPU95 left 161
[    6.860520] Detected PIPT I-cache on CPU95
[    6.860618] GICv3: CPU95: found redistributor 1f02 region 0:0x0000000401fc0000
[    6.860627] GICv3: CPU95: using allocated LPI pending table @0x0000000880ef0000
[    6.860681] CPU95: Booted secondary processor 0x0000001f02 [0x431f0af1]
[    6.860777] Bringup CPU95 0
[    6.860780] Bringup CPU96 left 160
[    6.929652] Detected PIPT I-cache on CPU96
[    6.929700] GICv3: CPU96: found redistributor 3 region 0:0x0000000401060000
[    6.929710] GICv3: CPU96: using allocated LPI pending table @0x0000000880f00000
[    6.929763] CPU96: Booted secondary processor 0x0000000003 [0x431f0af1]
[    6.929866] Bringup CPU96 0
[    6.929869] Bringup CPU97 left 159
[    6.998236] Detected PIPT I-cache on CPU97
[    6.998279] GICv3: CPU97: found redistributor 103 region 0:0x00000004010e0000
[    6.998287] GICv3: CPU97: using allocated LPI pending table @0x0000000880f10000
[    6.998339] CPU97: Booted secondary processor 0x0000000103 [0x431f0af1]
[    6.998434] Bringup CPU97 0
[    6.998437] Bringup CPU98 left 158
[    7.066986] Detected PIPT I-cache on CPU98
[    7.067036] GICv3: CPU98: found redistributor 203 region 0:0x0000000401160000
[    7.067045] GICv3: CPU98: using allocated LPI pending table @0x0000000880f20000
[    7.067097] CPU98: Booted secondary processor 0x0000000203 [0x431f0af1]
[    7.067194] Bringup CPU98 0
[    7.067197] Bringup CPU99 left 157
[    7.135657] Detected PIPT I-cache on CPU99
[    7.135709] GICv3: CPU99: found redistributor 303 region 0:0x00000004011e0000
[    7.135717] GICv3: CPU99: using allocated LPI pending table @0x0000000880f30000
[    7.135770] CPU99: Booted secondary processor 0x0000000303 [0x431f0af1]
[    7.135870] Bringup CPU99 0
[    7.135872] Bringup CPU100 left 156
[    7.204283] Detected PIPT I-cache on CPU100
[    7.204337] GICv3: CPU100: found redistributor 403 region 0:0x0000000401260000
[    7.204346] GICv3: CPU100: using allocated LPI pending table @0x0000000880f40000
[    7.204399] CPU100: Booted secondary processor 0x0000000403 [0x431f0af1]
[    7.204496] Bringup CPU100 0
[    7.204499] Bringup CPU101 left 155
[    7.273135] Detected PIPT I-cache on CPU101
[    7.273191] GICv3: CPU101: found redistributor 503 region 0:0x00000004012e0000
[    7.273200] GICv3: CPU101: using allocated LPI pending table @0x0000000880f50000
[    7.273252] CPU101: Booted secondary processor 0x0000000503 [0x431f0af1]
[    7.273354] Bringup CPU101 0
[    7.273357] Bringup CPU102 left 154
[    7.341720] Detected PIPT I-cache on CPU102
[    7.341777] GICv3: CPU102: found redistributor 603 region 0:0x0000000401360000
[    7.341786] GICv3: CPU102: using allocated LPI pending table @0x0000000880f60000
[    7.341837] CPU102: Booted secondary processor 0x0000000603 [0x431f0af1]
[    7.341937] Bringup CPU102 0
[    7.341940] Bringup CPU103 left 153
[    7.410435] Detected PIPT I-cache on CPU103
[    7.410495] GICv3: CPU103: found redistributor 703 region 0:0x00000004013e0000
[    7.410503] GICv3: CPU103: using allocated LPI pending table @0x0000000880f70000
[    7.410556] CPU103: Booted secondary processor 0x0000000703 [0x431f0af1]
[    7.410653] Bringup CPU103 0
[    7.410655] Bringup CPU104 left 152
[    7.479258] Detected PIPT I-cache on CPU104
[    7.479319] GICv3: CPU104: found redistributor 803 region 0:0x0000000401460000
[    7.479328] GICv3: CPU104: using allocated LPI pending table @0x0000000880f80000
[    7.479381] CPU104: Booted secondary processor 0x0000000803 [0x431f0af1]
[    7.479479] Bringup CPU104 0
[    7.479482] Bringup CPU105 left 151
[    7.548112] Detected PIPT I-cache on CPU105
[    7.548174] GICv3: CPU105: found redistributor 903 region 0:0x00000004014e0000
[    7.548184] GICv3: CPU105: using allocated LPI pending table @0x0000000880f90000
[    7.548236] CPU105: Booted secondary processor 0x0000000903 [0x431f0af1]
[    7.548331] Bringup CPU105 0
[    7.548334] Bringup CPU106 left 150
[    7.616781] Detected PIPT I-cache on CPU106
[    7.616848] GICv3: CPU106: found redistributor a03 region 0:0x0000000401560000
[    7.616858] GICv3: CPU106: using allocated LPI pending table @0x0000000880fa0000
[    7.616910] CPU106: Booted secondary processor 0x0000000a03 [0x431f0af1]
[    7.617006] Bringup CPU106 0
[    7.617009] Bringup CPU107 left 149
[    7.685372] Detected PIPT I-cache on CPU107
[    7.685439] GICv3: CPU107: found redistributor b03 region 0:0x00000004015e0000
[    7.685449] GICv3: CPU107: using allocated LPI pending table @0x0000000880fb0000
[    7.685501] CPU107: Booted secondary processor 0x0000000b03 [0x431f0af1]
[    7.685626] Bringup CPU107 0
[    7.685629] Bringup CPU108 left 148
[    7.754320] Detected PIPT I-cache on CPU108
[    7.754389] GICv3: CPU108: found redistributor c03 region 0:0x0000000401660000
[    7.754398] GICv3: CPU108: using allocated LPI pending table @0x0000000880fc0000
[    7.754451] CPU108: Booted secondary processor 0x0000000c03 [0x431f0af1]
[    7.754548] Bringup CPU108 0
[    7.754551] Bringup CPU109 left 147
[    7.823128] Detected PIPT I-cache on CPU109
[    7.823199] GICv3: CPU109: found redistributor d03 region 0:0x00000004016e0000
[    7.823208] GICv3: CPU109: using allocated LPI pending table @0x0000000880fd0000
[    7.823260] CPU109: Booted secondary processor 0x0000000d03 [0x431f0af1]
[    7.823359] Bringup CPU109 0
[    7.823362] Bringup CPU110 left 146
[    7.891829] Detected PIPT I-cache on CPU110
[    7.891904] GICv3: CPU110: found redistributor e03 region 0:0x0000000401760000
[    7.891913] GICv3: CPU110: using allocated LPI pending table @0x0000000880fe0000
[    7.891965] CPU110: Booted secondary processor 0x0000000e03 [0x431f0af1]
[    7.892064] Bringup CPU110 0
[    7.892067] Bringup CPU111 left 145
[    7.960367] Detected PIPT I-cache on CPU111
[    7.960442] GICv3: CPU111: found redistributor f03 region 0:0x00000004017e0000
[    7.960451] GICv3: CPU111: using allocated LPI pending table @0x0000000880ff0000
[    7.960503] CPU111: Booted secondary processor 0x0000000f03 [0x431f0af1]
[    7.960600] Bringup CPU111 0
[    7.960603] Bringup CPU112 left 144
[    8.029232] Detected PIPT I-cache on CPU112
[    8.029310] GICv3: CPU112: found redistributor 1003 region 0:0x0000000401860000
[    8.029319] GICv3: CPU112: using allocated LPI pending table @0x0000000881000000
[    8.029370] CPU112: Booted secondary processor 0x0000001003 [0x431f0af1]
[    8.029471] Bringup CPU112 0
[    8.029474] Bringup CPU113 left 143
[    8.098113] Detected PIPT I-cache on CPU113
[    8.098193] GICv3: CPU113: found redistributor 1103 region 0:0x00000004018e0000
[    8.098203] GICv3: CPU113: using allocated LPI pending table @0x0000000881010000
[    8.098255] CPU113: Booted secondary processor 0x0000001103 [0x431f0af1]
[    8.098353] Bringup CPU113 0
[    8.098356] Bringup CPU114 left 142
[    8.167090] Detected PIPT I-cache on CPU114
[    8.167172] GICv3: CPU114: found redistributor 1203 region 0:0x0000000401960000
[    8.167181] GICv3: CPU114: using allocated LPI pending table @0x0000000881020000
[    8.167234] CPU114: Booted secondary processor 0x0000001203 [0x431f0af1]
[    8.167333] Bringup CPU114 0
[    8.167336] Bringup CPU115 left 141
[    8.236006] Detected PIPT I-cache on CPU115
[    8.236090] GICv3: CPU115: found redistributor 1303 region 0:0x00000004019e0000
[    8.236099] GICv3: CPU115: using allocated LPI pending table @0x0000000881030000
[    8.236152] CPU115: Booted secondary processor 0x0000001303 [0x431f0af1]
[    8.236252] Bringup CPU115 0
[    8.236255] Bringup CPU116 left 140
[    8.304797] Detected PIPT I-cache on CPU116
[    8.304883] GICv3: CPU116: found redistributor 1403 region 0:0x0000000401a60000
[    8.304892] GICv3: CPU116: using allocated LPI pending table @0x0000000881040000
[    8.304944] CPU116: Booted secondary processor 0x0000001403 [0x431f0af1]
[    8.305045] Bringup CPU116 0
[    8.305048] Bringup CPU117 left 139
[    8.373728] Detected PIPT I-cache on CPU117
[    8.373816] GICv3: CPU117: found redistributor 1503 region 0:0x0000000401ae0000
[    8.373826] GICv3: CPU117: using allocated LPI pending table @0x0000000881050000
[    8.373879] CPU117: Booted secondary processor 0x0000001503 [0x431f0af1]
[    8.373982] Bringup CPU117 0
[    8.373985] Bringup CPU118 left 138
[    8.442617] Detected PIPT I-cache on CPU118
[    8.442707] GICv3: CPU118: found redistributor 1603 region 0:0x0000000401b60000
[    8.442716] GICv3: CPU118: using allocated LPI pending table @0x0000000881060000
[    8.442768] CPU118: Booted secondary processor 0x0000001603 [0x431f0af1]
[    8.442868] Bringup CPU118 0
[    8.442871] Bringup CPU119 left 137
[    8.511459] Detected PIPT I-cache on CPU119
[    8.511550] GICv3: CPU119: found redistributor 1703 region 0:0x0000000401be0000
[    8.511560] GICv3: CPU119: using allocated LPI pending table @0x0000000881070000
[    8.511612] CPU119: Booted secondary processor 0x0000001703 [0x431f0af1]
[    8.511712] Bringup CPU119 0
[    8.511715] Bringup CPU120 left 136
[    8.580428] Detected PIPT I-cache on CPU120
[    8.580521] GICv3: CPU120: found redistributor 1803 region 0:0x0000000401c60000
[    8.580531] GICv3: CPU120: using allocated LPI pending table @0x0000000881080000
[    8.580583] CPU120: Booted secondary processor 0x0000001803 [0x431f0af1]
[    8.580682] Bringup CPU120 0
[    8.580685] Bringup CPU121 left 135
[    8.649122] Detected PIPT I-cache on CPU121
[    8.649218] GICv3: CPU121: found redistributor 1903 region 0:0x0000000401ce0000
[    8.649227] GICv3: CPU121: using allocated LPI pending table @0x0000000881090000
[    8.649279] CPU121: Booted secondary processor 0x0000001903 [0x431f0af1]
[    8.649379] Bringup CPU121 0
[    8.649382] Bringup CPU122 left 134
[    8.718460] Detected PIPT I-cache on CPU122
[    8.718558] GICv3: CPU122: found redistributor 1a03 region 0:0x0000000401d60000
[    8.718567] GICv3: CPU122: using allocated LPI pending table @0x00000008810a0000
[    8.718620] CPU122: Booted secondary processor 0x0000001a03 [0x431f0af1]
[    8.718751] Bringup CPU122 0
[    8.718754] Bringup CPU123 left 133
[    8.787419] Detected PIPT I-cache on CPU123
[    8.787519] GICv3: CPU123: found redistributor 1b03 region 0:0x0000000401de0000
[    8.787528] GICv3: CPU123: using allocated LPI pending table @0x00000008810b0000
[    8.787580] CPU123: Booted secondary processor 0x0000001b03 [0x431f0af1]
[    8.787683] Bringup CPU123 0
[    8.787686] Bringup CPU124 left 132
[    8.856255] Detected PIPT I-cache on CPU124
[    8.856357] GICv3: CPU124: found redistributor 1c03 region 0:0x0000000401e60000
[    8.856366] GICv3: CPU124: using allocated LPI pending table @0x00000008810c0000
[    8.856419] CPU124: Booted secondary processor 0x0000001c03 [0x431f0af1]
[    8.856518] Bringup CPU124 0
[    8.856522] Bringup CPU125 left 131
[    8.924931] Detected PIPT I-cache on CPU125
[    8.925034] GICv3: CPU125: found redistributor 1d03 region 0:0x0000000401ee0000
[    8.925044] GICv3: CPU125: using allocated LPI pending table @0x00000008810d0000
[    8.925095] CPU125: Booted secondary processor 0x0000001d03 [0x431f0af1]
[    8.925196] Bringup CPU125 0
[    8.925199] Bringup CPU126 left 130
[    8.993823] Detected PIPT I-cache on CPU126
[    8.993931] GICv3: CPU126: found redistributor 1e03 region 0:0x0000000401f60000
[    8.993941] GICv3: CPU126: using allocated LPI pending table @0x00000008810e0000
[    8.993993] CPU126: Booted secondary processor 0x0000001e03 [0x431f0af1]
[    8.994095] Bringup CPU126 0
[    8.994098] Bringup CPU127 left 129
[    9.062463] Detected PIPT I-cache on CPU127
[    9.062570] GICv3: CPU127: found redistributor 1f03 region 0:0x0000000401fe0000
[    9.062579] GICv3: CPU127: using allocated LPI pending table @0x00000008810f0000
[    9.062631] CPU127: Booted secondary processor 0x0000001f03 [0x431f0af1]
[    9.062734] Bringup CPU127 0
[    9.062737] Bringup CPU128 left 128
[    9.220384] Detected PIPT I-cache on CPU128
[    9.220561] GICv3: CPU128: found redistributor 10000 region 1:0x0000000441000000
[    9.220580] GICv3: CPU128: using allocated LPI pending table @0x0000000881100000
[    9.220666] CPU128: Booted secondary processor 0x0000010000 [0x431f0af1]
[    9.221068] Bringup CPU128 0
[    9.221072] Bringup CPU129 left 127
[    9.387854] Detected PIPT I-cache on CPU129
[    9.387987] GICv3: CPU129: found redistributor 10100 region 1:0x0000000441080000
[    9.387998] GICv3: CPU129: using allocated LPI pending table @0x0000000881110000
[    9.388068] CPU129: Booted secondary processor 0x0000010100 [0x431f0af1]
[    9.388215] Bringup CPU129 0
[    9.388219] Bringup CPU130 left 126
[    9.545912] Detected PIPT I-cache on CPU130
[    9.546046] GICv3: CPU130: found redistributor 10200 region 1:0x0000000441100000
[    9.546057] GICv3: CPU130: using allocated LPI pending table @0x0000000881120000
[    9.546126] CPU130: Booted secondary processor 0x0000010200 [0x431f0af1]
[    9.546275] Bringup CPU130 0
[    9.546279] Bringup CPU131 left 125
[    9.711527] Detected PIPT I-cache on CPU131
[    9.711666] GICv3: CPU131: found redistributor 10300 region 1:0x0000000441180000
[    9.711678] GICv3: CPU131: using allocated LPI pending table @0x0000000881130000
[    9.711750] CPU131: Booted secondary processor 0x0000010300 [0x431f0af1]
[    9.711898] Bringup CPU131 0
[    9.711901] Bringup CPU132 left 124
[    9.879364] Detected PIPT I-cache on CPU132
[    9.879505] GICv3: CPU132: found redistributor 10400 region 1:0x0000000441200000
[    9.879517] GICv3: CPU132: using allocated LPI pending table @0x0000000881140000
[    9.879589] CPU132: Booted secondary processor 0x0000010400 [0x431f0af1]
[    9.879805] Bringup CPU132 0
[    9.879809] Bringup CPU133 left 123
[   10.048128] Detected PIPT I-cache on CPU133
[   10.048269] GICv3: CPU133: found redistributor 10500 region 1:0x0000000441280000
[   10.048281] GICv3: CPU133: using allocated LPI pending table @0x0000000881150000
[   10.048350] CPU133: Booted secondary processor 0x0000010500 [0x431f0af1]
[   10.048498] Bringup CPU133 0
[   10.048502] Bringup CPU134 left 122
[   10.214683] Detected PIPT I-cache on CPU134
[   10.214828] GICv3: CPU134: found redistributor 10600 region 1:0x0000000441300000
[   10.214840] GICv3: CPU134: using allocated LPI pending table @0x0000000881160000
[   10.214910] CPU134: Booted secondary processor 0x0000010600 [0x431f0af1]
[   10.215055] Bringup CPU134 0
[   10.215059] Bringup CPU135 left 121
[   10.372067] Detected PIPT I-cache on CPU135
[   10.372213] GICv3: CPU135: found redistributor 10700 region 1:0x0000000441380000
[   10.372225] GICv3: CPU135: using allocated LPI pending table @0x0000000881170000
[   10.372295] CPU135: Booted secondary processor 0x0000010700 [0x431f0af1]
[   10.372444] Bringup CPU135 0
[   10.372448] Bringup CPU136 left 120
[   10.539095] Detected PIPT I-cache on CPU136
[   10.539244] GICv3: CPU136: found redistributor 10800 region 1:0x0000000441400000
[   10.539256] GICv3: CPU136: using allocated LPI pending table @0x0000000881180000
[   10.539325] CPU136: Booted secondary processor 0x0000010800 [0x431f0af1]
[   10.539471] Bringup CPU136 0
[   10.539474] Bringup CPU137 left 119
[   10.705053] Detected PIPT I-cache on CPU137
[   10.705204] GICv3: CPU137: found redistributor 10900 region 1:0x0000000441480000
[   10.705216] GICv3: CPU137: using allocated LPI pending table @0x0000000881190000
[   10.705285] CPU137: Booted secondary processor 0x0000010900 [0x431f0af1]
[   10.705430] Bringup CPU137 0
[   10.705434] Bringup CPU138 left 118
[   10.871710] Detected PIPT I-cache on CPU138
[   10.871863] GICv3: CPU138: found redistributor 10a00 region 1:0x0000000441500000
[   10.871875] GICv3: CPU138: using allocated LPI pending table @0x00000008811a0000
[   10.871944] CPU138: Booted secondary processor 0x0000010a00 [0x431f0af1]
[   10.872094] Bringup CPU138 0
[   10.872097] Bringup CPU139 left 117
[   11.037668] Detected PIPT I-cache on CPU139
[   11.037823] GICv3: CPU139: found redistributor 10b00 region 1:0x0000000441580000
[   11.037835] GICv3: CPU139: using allocated LPI pending table @0x00000008811b0000
[   11.037906] CPU139: Booted secondary processor 0x0000010b00 [0x431f0af1]
[   11.038125] Bringup CPU139 0
[   11.038129] Bringup CPU140 left 116
[   11.203709] Detected PIPT I-cache on CPU140
[   11.203866] GICv3: CPU140: found redistributor 10c00 region 1:0x0000000441600000
[   11.203878] GICv3: CPU140: using allocated LPI pending table @0x00000008811c0000
[   11.203948] CPU140: Booted secondary processor 0x0000010c00 [0x431f0af1]
[   11.204101] Bringup CPU140 0
[   11.204105] Bringup CPU141 left 115
[   11.370278] Detected PIPT I-cache on CPU141
[   11.370434] GICv3: CPU141: found redistributor 10d00 region 1:0x0000000441680000
[   11.370446] GICv3: CPU141: using allocated LPI pending table @0x00000008811d0000
[   11.370517] CPU141: Booted secondary processor 0x0000010d00 [0x431f0af1]
[   11.370669] Bringup CPU141 0
[   11.370672] Bringup CPU142 left 114
[   11.534969] Detected PIPT I-cache on CPU142
[   11.535132] GICv3: CPU142: found redistributor 10e00 region 1:0x0000000441700000
[   11.535144] GICv3: CPU142: using allocated LPI pending table @0x00000008811e0000
[   11.535213] CPU142: Booted secondary processor 0x0000010e00 [0x431f0af1]
[   11.535359] Bringup CPU142 0
[   11.535363] Bringup CPU143 left 113
[   11.701078] Detected PIPT I-cache on CPU143
[   11.701242] GICv3: CPU143: found redistributor 10f00 region 1:0x0000000441780000
[   11.701254] GICv3: CPU143: using allocated LPI pending table @0x00000008811f0000
[   11.701323] CPU143: Booted secondary processor 0x0000010f00 [0x431f0af1]
[   11.701468] Bringup CPU143 0
[   11.701472] Bringup CPU144 left 112
[   11.869090] Detected PIPT I-cache on CPU144
[   11.869258] GICv3: CPU144: found redistributor 11000 region 1:0x0000000441800000
[   11.869271] GICv3: CPU144: using allocated LPI pending table @0x0000000881200000
[   11.869341] CPU144: Booted secondary processor 0x0000011000 [0x431f0af1]
[   11.869491] Bringup CPU144 0
[   11.869495] Bringup CPU145 left 111
[   12.034171] Detected PIPT I-cache on CPU145
[   12.034340] GICv3: CPU145: found redistributor 11100 region 1:0x0000000441880000
[   12.034353] GICv3: CPU145: using allocated LPI pending table @0x0000000881210000
[   12.034423] CPU145: Booted secondary processor 0x0000011100 [0x431f0af1]
[   12.034650] Bringup CPU145 0
[   12.034654] Bringup CPU146 left 110
[   12.200891] Detected PIPT I-cache on CPU146
[   12.201062] GICv3: CPU146: found redistributor 11200 region 1:0x0000000441900000
[   12.201075] GICv3: CPU146: using allocated LPI pending table @0x0000000881220000
[   12.201144] CPU146: Booted secondary processor 0x0000011200 [0x431f0af1]
[   12.201300] Bringup CPU146 0
[   12.201304] Bringup CPU147 left 109
[   12.366599] Detected PIPT I-cache on CPU147
[   12.366771] GICv3: CPU147: found redistributor 11300 region 1:0x0000000441980000
[   12.366784] GICv3: CPU147: using allocated LPI pending table @0x0000000881230000
[   12.366854] CPU147: Booted secondary processor 0x0000011300 [0x431f0af1]
[   12.367007] Bringup CPU147 0
[   12.367011] Bringup CPU148 left 108
[   12.532735] Detected PIPT I-cache on CPU148
[   12.532911] GICv3: CPU148: found redistributor 11400 region 1:0x0000000441a00000
[   12.532924] GICv3: CPU148: using allocated LPI pending table @0x0000000881240000
[   12.532994] CPU148: Booted secondary processor 0x0000011400 [0x431f0af1]
[   12.533147] Bringup CPU148 0
[   12.533151] Bringup CPU149 left 107
[   12.699101] Detected PIPT I-cache on CPU149
[   12.699279] GICv3: CPU149: found redistributor 11500 region 1:0x0000000441a80000
[   12.699292] GICv3: CPU149: using allocated LPI pending table @0x0000000881250000
[   12.699363] CPU149: Booted secondary processor 0x0000011500 [0x431f0af1]
[   12.699518] Bringup CPU149 0
[   12.699522] Bringup CPU150 left 106
[   12.864456] Detected PIPT I-cache on CPU150
[   12.864634] GICv3: CPU150: found redistributor 11600 region 1:0x0000000441b00000
[   12.864647] GICv3: CPU150: using allocated LPI pending table @0x0000000881260000
[   12.864717] CPU150: Booted secondary processor 0x0000011600 [0x431f0af1]
[   12.864871] Bringup CPU150 0
[   12.864874] Bringup CPU151 left 105
[   13.032476] Detected PIPT I-cache on CPU151
[   13.032656] GICv3: CPU151: found redistributor 11700 region 1:0x0000000441b80000
[   13.032669] GICv3: CPU151: using allocated LPI pending table @0x0000000881270000
[   13.032739] CPU151: Booted secondary processor 0x0000011700 [0x431f0af1]
[   13.032893] Bringup CPU151 0
[   13.032897] Bringup CPU152 left 104
[   13.200736] Detected PIPT I-cache on CPU152
[   13.200918] GICv3: CPU152: found redistributor 11800 region 1:0x0000000441c00000
[   13.200931] GICv3: CPU152: using allocated LPI pending table @0x0000000881280000
[   13.201003] CPU152: Booted secondary processor 0x0000011800 [0x431f0af1]
[   13.201234] Bringup CPU152 0
[   13.201238] Bringup CPU153 left 103
[   13.369144] Detected PIPT I-cache on CPU153
[   13.369326] GICv3: CPU153: found redistributor 11900 region 1:0x0000000441c80000
[   13.369338] GICv3: CPU153: using allocated LPI pending table @0x0000000881290000
[   13.369410] CPU153: Booted secondary processor 0x0000011900 [0x431f0af1]
[   13.369566] Bringup CPU153 0
[   13.369570] Bringup CPU154 left 102
[   13.537130] Detected PIPT I-cache on CPU154
[   13.537316] GICv3: CPU154: found redistributor 11a00 region 1:0x0000000441d00000
[   13.537328] GICv3: CPU154: using allocated LPI pending table @0x00000008812a0000
[   13.537398] CPU154: Booted secondary processor 0x0000011a00 [0x431f0af1]
[   13.537551] Bringup CPU154 0
[   13.537555] Bringup CPU155 left 101
[   13.704667] Detected PIPT I-cache on CPU155
[   13.704852] GICv3: CPU155: found redistributor 11b00 region 1:0x0000000441d80000
[   13.704865] GICv3: CPU155: using allocated LPI pending table @0x00000008812b0000
[   13.704935] CPU155: Booted secondary processor 0x0000011b00 [0x431f0af1]
[   13.705088] Bringup CPU155 0
[   13.705091] Bringup CPU156 left 100
[   13.873414] Detected PIPT I-cache on CPU156
[   13.873602] GICv3: CPU156: found redistributor 11c00 region 1:0x0000000441e00000
[   13.873615] GICv3: CPU156: using allocated LPI pending table @0x00000008812c0000
[   13.873685] CPU156: Booted secondary processor 0x0000011c00 [0x431f0af1]
[   13.873841] Bringup CPU156 0
[   13.873845] Bringup CPU157 left 99
[   14.041030] Detected PIPT I-cache on CPU157
[   14.041223] GICv3: CPU157: found redistributor 11d00 region 1:0x0000000441e80000
[   14.041235] GICv3: CPU157: using allocated LPI pending table @0x00000008812d0000
[   14.041306] CPU157: Booted secondary processor 0x0000011d00 [0x431f0af1]
[   14.041458] Bringup CPU157 0
[   14.041462] Bringup CPU158 left 98
[   14.209238] Detected PIPT I-cache on CPU158
[   14.209434] GICv3: CPU158: found redistributor 11e00 region 1:0x0000000441f00000
[   14.209446] GICv3: CPU158: using allocated LPI pending table @0x00000008812e0000
[   14.209516] CPU158: Booted secondary processor 0x0000011e00 [0x431f0af1]
[   14.209751] Bringup CPU158 0
[   14.209755] Bringup CPU159 left 97
[   14.375193] Detected PIPT I-cache on CPU159
[   14.375393] GICv3: CPU159: found redistributor 11f00 region 1:0x0000000441f80000
[   14.375405] GICv3: CPU159: using allocated LPI pending table @0x00000008812f0000
[   14.375475] CPU159: Booted secondary processor 0x0000011f00 [0x431f0af1]
[   14.375633] Bringup CPU159 0
[   14.375637] Bringup CPU160 left 96
[   14.530664] Detected PIPT I-cache on CPU160
[   14.530800] GICv3: CPU160: found redistributor 10001 region 1:0x0000000441020000
[   14.530812] GICv3: CPU160: using allocated LPI pending table @0x0000000881300000
[   14.530880] CPU160: Booted secondary processor 0x0000010001 [0x431f0af1]
[   14.531019] Bringup CPU160 0
[   14.531022] Bringup CPU161 left 95
[   14.686047] Detected PIPT I-cache on CPU161
[   14.686189] GICv3: CPU161: found redistributor 10101 region 1:0x00000004410a0000
[   14.686200] GICv3: CPU161: using allocated LPI pending table @0x0000000881310000
[   14.686269] CPU161: Booted secondary processor 0x0000010101 [0x431f0af1]
[   14.686405] Bringup CPU161 0
[   14.686409] Bringup CPU162 left 94
[   14.841025] Detected PIPT I-cache on CPU162
[   14.841168] GICv3: CPU162: found redistributor 10201 region 1:0x0000000441120000
[   14.841179] GICv3: CPU162: using allocated LPI pending table @0x0000000881320000
[   14.841247] CPU162: Booted secondary processor 0x0000010201 [0x431f0af1]
[   14.841380] Bringup CPU162 0
[   14.841384] Bringup CPU163 left 93
[   14.996501] Detected PIPT I-cache on CPU163
[   14.996648] GICv3: CPU163: found redistributor 10301 region 1:0x00000004411a0000
[   14.996660] GICv3: CPU163: using allocated LPI pending table @0x0000000881330000
[   14.996726] CPU163: Booted secondary processor 0x0000010301 [0x431f0af1]
[   14.996868] Bringup CPU163 0
[   14.996871] Bringup CPU164 left 92
[   15.151407] Detected PIPT I-cache on CPU164
[   15.151552] GICv3: CPU164: found redistributor 10401 region 1:0x0000000441220000
[   15.151564] GICv3: CPU164: using allocated LPI pending table @0x0000000881340000
[   15.151633] CPU164: Booted secondary processor 0x0000010401 [0x431f0af1]
[   15.151776] Bringup CPU164 0
[   15.151780] Bringup CPU165 left 91
[   15.306915] Detected PIPT I-cache on CPU165
[   15.307063] GICv3: CPU165: found redistributor 10501 region 1:0x00000004412a0000
[   15.307075] GICv3: CPU165: using allocated LPI pending table @0x0000000881350000
[   15.307141] CPU165: Booted secondary processor 0x0000010501 [0x431f0af1]
[   15.307364] Bringup CPU165 0
[   15.307368] Bringup CPU166 left 90
[   15.461950] Detected PIPT I-cache on CPU166
[   15.462101] GICv3: CPU166: found redistributor 10601 region 1:0x0000000441320000
[   15.462113] GICv3: CPU166: using allocated LPI pending table @0x0000000881360000
[   15.462180] CPU166: Booted secondary processor 0x0000010601 [0x431f0af1]
[   15.462325] Bringup CPU166 0
[   15.462328] Bringup CPU167 left 89
[   15.616738] Detected PIPT I-cache on CPU167
[   15.616891] GICv3: CPU167: found redistributor 10701 region 1:0x00000004413a0000
[   15.616903] GICv3: CPU167: using allocated LPI pending table @0x0000000881370000
[   15.616971] CPU167: Booted secondary processor 0x0000010701 [0x431f0af1]
[   15.617107] Bringup CPU167 0
[   15.617111] Bringup CPU168 left 88
[   15.771979] Detected PIPT I-cache on CPU168
[   15.772133] GICv3: CPU168: found redistributor 10801 region 1:0x0000000441420000
[   15.772145] GICv3: CPU168: using allocated LPI pending table @0x0000000881380000
[   15.772215] CPU168: Booted secondary processor 0x0000010801 [0x431f0af1]
[   15.772355] Bringup CPU168 0
[   15.772359] Bringup CPU169 left 87
[   15.927422] Detected PIPT I-cache on CPU169
[   15.927578] GICv3: CPU169: found redistributor 10901 region 1:0x00000004414a0000
[   15.927590] GICv3: CPU169: using allocated LPI pending table @0x0000000881390000
[   15.927659] CPU169: Booted secondary processor 0x0000010901 [0x431f0af1]
[   15.927796] Bringup CPU169 0
[   15.927799] Bringup CPU170 left 86
[   16.082419] Detected PIPT I-cache on CPU170
[   16.082581] GICv3: CPU170: found redistributor 10a01 region 1:0x0000000441520000
[   16.082592] GICv3: CPU170: using allocated LPI pending table @0x00000008813a0000
[   16.082660] CPU170: Booted secondary processor 0x0000010a01 [0x431f0af1]
[   16.082800] Bringup CPU170 0
[   16.082803] Bringup CPU171 left 85
[   16.238203] Detected PIPT I-cache on CPU171
[   16.238364] GICv3: CPU171: found redistributor 10b01 region 1:0x00000004415a0000
[   16.238376] GICv3: CPU171: using allocated LPI pending table @0x00000008813b0000
[   16.238445] CPU171: Booted secondary processor 0x0000010b01 [0x431f0af1]
[   16.238585] Bringup CPU171 0
[   16.238589] Bringup CPU172 left 84
[   16.393091] Detected PIPT I-cache on CPU172
[   16.393253] GICv3: CPU172: found redistributor 10c01 region 1:0x0000000441620000
[   16.393265] GICv3: CPU172: using allocated LPI pending table @0x00000008813c0000
[   16.393334] CPU172: Booted secondary processor 0x0000010c01 [0x431f0af1]
[   16.393560] Bringup CPU172 0
[   16.393564] Bringup CPU173 left 83
[   16.548527] Detected PIPT I-cache on CPU173
[   16.548690] GICv3: CPU173: found redistributor 10d01 region 1:0x00000004416a0000
[   16.548702] GICv3: CPU173: using allocated LPI pending table @0x00000008813d0000
[   16.548769] CPU173: Booted secondary processor 0x0000010d01 [0x431f0af1]
[   16.548917] Bringup CPU173 0
[   16.548920] Bringup CPU174 left 82
[   16.703287] Detected PIPT I-cache on CPU174
[   16.703458] GICv3: CPU174: found redistributor 10e01 region 1:0x0000000441720000
[   16.703470] GICv3: CPU174: using allocated LPI pending table @0x00000008813e0000
[   16.703537] CPU174: Booted secondary processor 0x0000010e01 [0x431f0af1]
[   16.703675] Bringup CPU174 0
[   16.703678] Bringup CPU175 left 81
[   16.858143] Detected PIPT I-cache on CPU175
[   16.858311] GICv3: CPU175: found redistributor 10f01 region 1:0x00000004417a0000
[   16.858323] GICv3: CPU175: using allocated LPI pending table @0x00000008813f0000
[   16.858391] CPU175: Booted secondary processor 0x0000010f01 [0x431f0af1]
[   16.858533] Bringup CPU175 0
[   16.858536] Bringup CPU176 left 80
[   17.013712] Detected PIPT I-cache on CPU176
[   17.013881] GICv3: CPU176: found redistributor 11001 region 1:0x0000000441820000
[   17.013893] GICv3: CPU176: using allocated LPI pending table @0x0000000881400000
[   17.013960] CPU176: Booted secondary processor 0x0000011001 [0x431f0af1]
[   17.014098] Bringup CPU176 0
[   17.014101] Bringup CPU177 left 79
[   17.169073] Detected PIPT I-cache on CPU177
[   17.169243] GICv3: CPU177: found redistributor 11101 region 1:0x00000004418a0000
[   17.169255] GICv3: CPU177: using allocated LPI pending table @0x0000000881410000
[   17.169323] CPU177: Booted secondary processor 0x0000011101 [0x431f0af1]
[   17.169459] Bringup CPU177 0
[   17.169463] Bringup CPU178 left 78
[   17.324558] Detected PIPT I-cache on CPU178
[   17.324730] GICv3: CPU178: found redistributor 11201 region 1:0x0000000441920000
[   17.324742] GICv3: CPU178: using allocated LPI pending table @0x0000000881420000
[   17.324809] CPU178: Booted secondary processor 0x0000011201 [0x431f0af1]
[   17.325040] Bringup CPU178 0
[   17.325043] Bringup CPU179 left 77
[   17.479665] Detected PIPT I-cache on CPU179
[   17.479840] GICv3: CPU179: found redistributor 11301 region 1:0x00000004419a0000
[   17.479852] GICv3: CPU179: using allocated LPI pending table @0x0000000881430000
[   17.479923] CPU179: Booted secondary processor 0x0000011301 [0x431f0af1]
[   17.480070] Bringup CPU179 0
[   17.480074] Bringup CPU180 left 76
[   17.634812] Detected PIPT I-cache on CPU180
[   17.634993] GICv3: CPU180: found redistributor 11401 region 1:0x0000000441a20000
[   17.635005] GICv3: CPU180: using allocated LPI pending table @0x0000000881440000
[   17.635073] CPU180: Booted secondary processor 0x0000011401 [0x431f0af1]
[   17.635216] Bringup CPU180 0
[   17.635219] Bringup CPU181 left 75
[   17.790362] Detected PIPT I-cache on CPU181
[   17.790547] GICv3: CPU181: found redistributor 11501 region 1:0x0000000441aa0000
[   17.790559] GICv3: CPU181: using allocated LPI pending table @0x0000000881450000
[   17.790627] CPU181: Booted secondary processor 0x0000011501 [0x431f0af1]
[   17.790770] Bringup CPU181 0
[   17.790774] Bringup CPU182 left 74
[   17.945435] Detected PIPT I-cache on CPU182
[   17.945616] GICv3: CPU182: found redistributor 11601 region 1:0x0000000441b20000
[   17.945628] GICv3: CPU182: using allocated LPI pending table @0x0000000881460000
[   17.945695] CPU182: Booted secondary processor 0x0000011601 [0x431f0af1]
[   17.945839] Bringup CPU182 0
[   17.945842] Bringup CPU183 left 73
[   18.100344] Detected PIPT I-cache on CPU183
[   18.100525] GICv3: CPU183: found redistributor 11701 region 1:0x0000000441ba0000
[   18.100537] GICv3: CPU183: using allocated LPI pending table @0x0000000881470000
[   18.100606] CPU183: Booted secondary processor 0x0000011701 [0x431f0af1]
[   18.100751] Bringup CPU183 0
[   18.100755] Bringup CPU184 left 72
[   18.255485] Detected PIPT I-cache on CPU184
[   18.255675] GICv3: CPU184: found redistributor 11801 region 1:0x0000000441c20000
[   18.255688] GICv3: CPU184: using allocated LPI pending table @0x0000000881480000
[   18.255755] CPU184: Booted secondary processor 0x0000011801 [0x431f0af1]
[   18.255896] Bringup CPU184 0
[   18.255899] Bringup CPU185 left 71
[   18.410979] Detected PIPT I-cache on CPU185
[   18.411166] GICv3: CPU185: found redistributor 11901 region 1:0x0000000441ca0000
[   18.411178] GICv3: CPU185: using allocated LPI pending table @0x0000000881490000
[   18.411246] CPU185: Booted secondary processor 0x0000011901 [0x431f0af1]
[   18.411485] Bringup CPU185 0
[   18.411489] Bringup CPU186 left 70
[   18.566521] Detected PIPT I-cache on CPU186
[   18.566712] GICv3: CPU186: found redistributor 11a01 region 1:0x0000000441d20000
[   18.566724] GICv3: CPU186: using allocated LPI pending table @0x00000008814a0000
[   18.566791] CPU186: Booted secondary processor 0x0000011a01 [0x431f0af1]
[   18.566944] Bringup CPU186 0
[   18.566947] Bringup CPU187 left 69
[   18.721510] Detected PIPT I-cache on CPU187
[   18.721706] GICv3: CPU187: found redistributor 11b01 region 1:0x0000000441da0000
[   18.721718] GICv3: CPU187: using allocated LPI pending table @0x00000008814b0000
[   18.721787] CPU187: Booted secondary processor 0x0000011b01 [0x431f0af1]
[   18.721931] Bringup CPU187 0
[   18.721934] Bringup CPU188 left 68
[   18.877191] Detected PIPT I-cache on CPU188
[   18.877386] GICv3: CPU188: found redistributor 11c01 region 1:0x0000000441e20000
[   18.877399] GICv3: CPU188: using allocated LPI pending table @0x00000008814c0000
[   18.877467] CPU188: Booted secondary processor 0x0000011c01 [0x431f0af1]
[   18.877612] Bringup CPU188 0
[   18.877616] Bringup CPU189 left 67
[   19.032656] Detected PIPT I-cache on CPU189
[   19.032852] GICv3: CPU189: found redistributor 11d01 region 1:0x0000000441ea0000
[   19.032863] GICv3: CPU189: using allocated LPI pending table @0x00000008814d0000
[   19.032931] CPU189: Booted secondary processor 0x0000011d01 [0x431f0af1]
[   19.033071] Bringup CPU189 0
[   19.033074] Bringup CPU190 left 66
[   19.188112] Detected PIPT I-cache on CPU190
[   19.188314] GICv3: CPU190: found redistributor 11e01 region 1:0x0000000441f20000
[   19.188326] GICv3: CPU190: using allocated LPI pending table @0x00000008814e0000
[   19.188394] CPU190: Booted secondary processor 0x0000011e01 [0x431f0af1]
[   19.188532] Bringup CPU190 0
[   19.188536] Bringup CPU191 left 65
[   19.343549] Detected PIPT I-cache on CPU191
[   19.343747] GICv3: CPU191: found redistributor 11f01 region 1:0x0000000441fa0000
[   19.343759] GICv3: CPU191: using allocated LPI pending table @0x00000008814f0000
[   19.343826] CPU191: Booted secondary processor 0x0000011f01 [0x431f0af1]
[   19.344062] Bringup CPU191 0
[   19.344066] Bringup CPU192 left 64
[   19.499181] Detected PIPT I-cache on CPU192
[   19.499333] GICv3: CPU192: found redistributor 10002 region 1:0x0000000441040000
[   19.499344] GICv3: CPU192: using allocated LPI pending table @0x0000000881500000
[   19.499410] CPU192: Booted secondary processor 0x0000010002 [0x431f0af1]
[   19.499555] Bringup CPU192 0
[   19.499559] Bringup CPU193 left 63
[   19.654687] Detected PIPT I-cache on CPU193
[   19.654833] GICv3: CPU193: found redistributor 10102 region 1:0x00000004410c0000
[   19.654845] GICv3: CPU193: using allocated LPI pending table @0x0000000881510000
[   19.654914] CPU193: Booted secondary processor 0x0000010102 [0x431f0af1]
[   19.655055] Bringup CPU193 0
[   19.655058] Bringup CPU194 left 62
[   19.809738] Detected PIPT I-cache on CPU194
[   19.809888] GICv3: CPU194: found redistributor 10202 region 1:0x0000000441140000
[   19.809899] GICv3: CPU194: using allocated LPI pending table @0x0000000881520000
[   19.809966] CPU194: Booted secondary processor 0x0000010202 [0x431f0af1]
[   19.810110] Bringup CPU194 0
[   19.810114] Bringup CPU195 left 61
[   19.964932] Detected PIPT I-cache on CPU195
[   19.965086] GICv3: CPU195: found redistributor 10302 region 1:0x00000004411c0000
[   19.965097] GICv3: CPU195: using allocated LPI pending table @0x0000000881530000
[   19.965164] CPU195: Booted secondary processor 0x0000010302 [0x431f0af1]
[   19.965304] Bringup CPU195 0
[   19.965307] Bringup CPU196 left 60
[   20.119941] Detected PIPT I-cache on CPU196
[   20.120094] GICv3: CPU196: found redistributor 10402 region 1:0x0000000441240000
[   20.120106] GICv3: CPU196: using allocated LPI pending table @0x0000000881540000
[   20.120174] CPU196: Booted secondary processor 0x0000010402 [0x431f0af1]
[   20.120315] Bringup CPU196 0
[   20.120319] Bringup CPU197 left 59
[   20.275680] Detected PIPT I-cache on CPU197
[   20.275841] GICv3: CPU197: found redistributor 10502 region 1:0x00000004412c0000
[   20.275852] GICv3: CPU197: using allocated LPI pending table @0x0000000881550000
[   20.275919] CPU197: Booted secondary processor 0x0000010502 [0x431f0af1]
[   20.276064] Bringup CPU197 0
[   20.276068] Bringup CPU198 left 58
[   20.430754] Detected PIPT I-cache on CPU198
[   20.430913] GICv3: CPU198: found redistributor 10602 region 1:0x0000000441340000
[   20.430924] GICv3: CPU198: using allocated LPI pending table @0x0000000881560000
[   20.430992] CPU198: Booted secondary processor 0x0000010602 [0x431f0af1]
[   20.431230] Bringup CPU198 0
[   20.431234] Bringup CPU199 left 57
[   20.585910] Detected PIPT I-cache on CPU199
[   20.586072] GICv3: CPU199: found redistributor 10702 region 1:0x00000004413c0000
[   20.586085] GICv3: CPU199: using allocated LPI pending table @0x0000000881570000
[   20.586150] CPU199: Booted secondary processor 0x0000010702 [0x431f0af1]
[   20.586298] Bringup CPU199 0
[   20.586301] Bringup CPU200 left 56
[   20.741300] Detected PIPT I-cache on CPU200
[   20.741469] GICv3: CPU200: found redistributor 10802 region 1:0x0000000441440000
[   20.741481] GICv3: CPU200: using allocated LPI pending table @0x0000000881580000
[   20.741548] CPU200: Booted secondary processor 0x0000010802 [0x431f0af1]
[   20.741690] Bringup CPU200 0
[   20.741694] Bringup CPU201 left 55
[   20.896632] Detected PIPT I-cache on CPU201
[   20.896796] GICv3: CPU201: found redistributor 10902 region 1:0x00000004414c0000
[   20.896808] GICv3: CPU201: using allocated LPI pending table @0x0000000881590000
[   20.896874] CPU201: Booted secondary processor 0x0000010902 [0x431f0af1]
[   20.897016] Bringup CPU201 0
[   20.897020] Bringup CPU202 left 54
[   21.051752] Detected PIPT I-cache on CPU202
[   21.051918] GICv3: CPU202: found redistributor 10a02 region 1:0x0000000441540000
[   21.051930] GICv3: CPU202: using allocated LPI pending table @0x00000008815a0000
[   21.051998] CPU202: Booted secondary processor 0x0000010a02 [0x431f0af1]
[   21.052138] Bringup CPU202 0
[   21.052142] Bringup CPU203 left 53
[   21.207990] Detected PIPT I-cache on CPU203
[   21.208156] GICv3: CPU203: found redistributor 10b02 region 1:0x00000004415c0000
[   21.208168] GICv3: CPU203: using allocated LPI pending table @0x00000008815b0000
[   21.208235] CPU203: Booted secondary processor 0x0000010b02 [0x431f0af1]
[   21.208377] Bringup CPU203 0
[   21.208380] Bringup CPU204 left 52
[   21.363111] Detected PIPT I-cache on CPU204
[   21.363279] GICv3: CPU204: found redistributor 10c02 region 1:0x0000000441640000
[   21.363292] GICv3: CPU204: using allocated LPI pending table @0x00000008815c0000
[   21.363359] CPU204: Booted secondary processor 0x0000010c02 [0x431f0af1]
[   21.363598] Bringup CPU204 0
[   21.363601] Bringup CPU205 left 51
[   21.518927] Detected PIPT I-cache on CPU205
[   21.519098] GICv3: CPU205: found redistributor 10d02 region 1:0x00000004416c0000
[   21.519110] GICv3: CPU205: using allocated LPI pending table @0x00000008815d0000
[   21.519177] CPU205: Booted secondary processor 0x0000010d02 [0x431f0af1]
[   21.519326] Bringup CPU205 0
[   21.519329] Bringup CPU206 left 50
[   21.673907] Detected PIPT I-cache on CPU206
[   21.674079] GICv3: CPU206: found redistributor 10e02 region 1:0x0000000441740000
[   21.674091] GICv3: CPU206: using allocated LPI pending table @0x00000008815e0000
[   21.674158] CPU206: Booted secondary processor 0x0000010e02 [0x431f0af1]
[   21.674299] Bringup CPU206 0
[   21.674302] Bringup CPU207 left 49
[   21.828905] Detected PIPT I-cache on CPU207
[   21.829083] GICv3: CPU207: found redistributor 10f02 region 1:0x00000004417c0000
[   21.829095] GICv3: CPU207: using allocated LPI pending table @0x00000008815f0000
[   21.829162] CPU207: Booted secondary processor 0x0000010f02 [0x431f0af1]
[   21.829303] Bringup CPU207 0
[   21.829307] Bringup CPU208 left 48
[   21.984625] Detected PIPT I-cache on CPU208
[   21.984804] GICv3: CPU208: found redistributor 11002 region 1:0x0000000441840000
[   21.984817] GICv3: CPU208: using allocated LPI pending table @0x0000000881600000
[   21.984883] CPU208: Booted secondary processor 0x0000011002 [0x431f0af1]
[   21.985027] Bringup CPU208 0
[   21.985030] Bringup CPU209 left 47
[   22.140149] Detected PIPT I-cache on CPU209
[   22.140328] GICv3: CPU209: found redistributor 11102 region 1:0x00000004418c0000
[   22.140340] GICv3: CPU209: using allocated LPI pending table @0x0000000881610000
[   22.140407] CPU209: Booted secondary processor 0x0000011102 [0x431f0af1]
[   22.140551] Bringup CPU209 0
[   22.140554] Bringup CPU210 left 46
[   22.295766] Detected PIPT I-cache on CPU210
[   22.295948] GICv3: CPU210: found redistributor 11202 region 1:0x0000000441940000
[   22.295960] GICv3: CPU210: using allocated LPI pending table @0x0000000881620000
[   22.296028] CPU210: Booted secondary processor 0x0000011202 [0x431f0af1]
[   22.296176] Bringup CPU210 0
[   22.296180] Bringup CPU211 left 45
[   22.450955] Detected PIPT I-cache on CPU211
[   22.451137] GICv3: CPU211: found redistributor 11302 region 1:0x00000004419c0000
[   22.451150] GICv3: CPU211: using allocated LPI pending table @0x0000000881630000
[   22.451216] CPU211: Booted secondary processor 0x0000011302 [0x431f0af1]
[   22.451467] Bringup CPU211 0
[   22.451471] Bringup CPU212 left 44
[   22.606322] Detected PIPT I-cache on CPU212
[   22.606507] GICv3: CPU212: found redistributor 11402 region 1:0x0000000441a40000
[   22.606519] GICv3: CPU212: using allocated LPI pending table @0x0000000881640000
[   22.606587] CPU212: Booted secondary processor 0x0000011402 [0x431f0af1]
[   22.606742] Bringup CPU212 0
[   22.606746] Bringup CPU213 left 43
[   22.762153] Detected PIPT I-cache on CPU213
[   22.762342] GICv3: CPU213: found redistributor 11502 region 1:0x0000000441ac0000
[   22.762354] GICv3: CPU213: using allocated LPI pending table @0x0000000881650000
[   22.762422] CPU213: Booted secondary processor 0x0000011502 [0x431f0af1]
[   22.762565] Bringup CPU213 0
[   22.762569] Bringup CPU214 left 42
[   22.917237] Detected PIPT I-cache on CPU214
[   22.917427] GICv3: CPU214: found redistributor 11602 region 1:0x0000000441b40000
[   22.917439] GICv3: CPU214: using allocated LPI pending table @0x0000000881660000
[   22.917506] CPU214: Booted secondary processor 0x0000011602 [0x431f0af1]
[   22.917647] Bringup CPU214 0
[   22.917651] Bringup CPU215 left 41
[   23.072267] Detected PIPT I-cache on CPU215
[   23.072458] GICv3: CPU215: found redistributor 11702 region 1:0x0000000441bc0000
[   23.072470] GICv3: CPU215: using allocated LPI pending table @0x0000000881670000
[   23.072537] CPU215: Booted secondary processor 0x0000011702 [0x431f0af1]
[   23.072678] Bringup CPU215 0
[   23.072682] Bringup CPU216 left 40
[   23.227546] Detected PIPT I-cache on CPU216
[   23.227739] GICv3: CPU216: found redistributor 11802 region 1:0x0000000441c40000
[   23.227752] GICv3: CPU216: using allocated LPI pending table @0x0000000881680000
[   23.227818] CPU216: Booted secondary processor 0x0000011802 [0x431f0af1]
[   23.227963] Bringup CPU216 0
[   23.227966] Bringup CPU217 left 39
[   23.383191] Detected PIPT I-cache on CPU217
[   23.383384] GICv3: CPU217: found redistributor 11902 region 1:0x0000000441cc0000
[   23.383396] GICv3: CPU217: using allocated LPI pending table @0x0000000881690000
[   23.383464] CPU217: Booted secondary processor 0x0000011902 [0x431f0af1]
[   23.383608] Bringup CPU217 0
[   23.383612] Bringup CPU218 left 38
[   23.538788] Detected PIPT I-cache on CPU218
[   23.538985] GICv3: CPU218: found redistributor 11a02 region 1:0x0000000441d40000
[   23.538997] GICv3: CPU218: using allocated LPI pending table @0x00000008816a0000
[   23.539066] CPU218: Booted secondary processor 0x0000011a02 [0x431f0af1]
[   23.539317] Bringup CPU218 0
[   23.539321] Bringup CPU219 left 37
[   23.694100] Detected PIPT I-cache on CPU219
[   23.694299] GICv3: CPU219: found redistributor 11b02 region 1:0x0000000441dc0000
[   23.694312] GICv3: CPU219: using allocated LPI pending table @0x00000008816b0000
[   23.694380] CPU219: Booted secondary processor 0x0000011b02 [0x431f0af1]
[   23.694532] Bringup CPU219 0
[   23.694536] Bringup CPU220 left 36
[   23.850295] Detected PIPT I-cache on CPU220
[   23.850496] GICv3: CPU220: found redistributor 11c02 region 1:0x0000000441e40000
[   23.850509] GICv3: CPU220: using allocated LPI pending table @0x00000008816c0000
[   23.850576] CPU220: Booted secondary processor 0x0000011c02 [0x431f0af1]
[   23.850723] Bringup CPU220 0
[   23.850727] Bringup CPU221 left 35
[   24.005688] Detected PIPT I-cache on CPU221
[   24.005888] GICv3: CPU221: found redistributor 11d02 region 1:0x0000000441ec0000
[   24.005901] GICv3: CPU221: using allocated LPI pending table @0x00000008816d0000
[   24.005968] CPU221: Booted secondary processor 0x0000011d02 [0x431f0af1]
[   24.006112] Bringup CPU221 0
[   24.006115] Bringup CPU222 left 34
[   24.161241] Detected PIPT I-cache on CPU222
[   24.161448] GICv3: CPU222: found redistributor 11e02 region 1:0x0000000441f40000
[   24.161461] GICv3: CPU222: using allocated LPI pending table @0x00000008816e0000
[   24.161529] CPU222: Booted secondary processor 0x0000011e02 [0x431f0af1]
[   24.161672] Bringup CPU222 0
[   24.161675] Bringup CPU223 left 33
[   24.316729] Detected PIPT I-cache on CPU223
[   24.316935] GICv3: CPU223: found redistributor 11f02 region 1:0x0000000441fc0000
[   24.316948] GICv3: CPU223: using allocated LPI pending table @0x00000008816f0000
[   24.317015] CPU223: Booted secondary processor 0x0000011f02 [0x431f0af1]
[   24.317163] Bringup CPU223 0
[   24.317167] Bringup CPU224 left 32
[   24.472373] Detected PIPT I-cache on CPU224
[   24.472527] GICv3: CPU224: found redistributor 10003 region 1:0x0000000441060000
[   24.472540] GICv3: CPU224: using allocated LPI pending table @0x0000000881700000
[   24.472606] CPU224: Booted secondary processor 0x0000010003 [0x431f0af1]
[   24.472865] Bringup CPU224 0
[   24.472869] Bringup CPU225 left 31
[   24.628094] Detected PIPT I-cache on CPU225
[   24.628251] GICv3: CPU225: found redistributor 10103 region 1:0x00000004410e0000
[   24.628263] GICv3: CPU225: using allocated LPI pending table @0x0000000881710000
[   24.628331] CPU225: Booted secondary processor 0x0000010103 [0x431f0af1]
[   24.628485] Bringup CPU225 0
[   24.628489] Bringup CPU226 left 30
[   24.783218] Detected PIPT I-cache on CPU226
[   24.783376] GICv3: CPU226: found redistributor 10203 region 1:0x0000000441160000
[   24.783389] GICv3: CPU226: using allocated LPI pending table @0x0000000881720000
[   24.783455] CPU226: Booted secondary processor 0x0000010203 [0x431f0af1]
[   24.783602] Bringup CPU226 0
[   24.783606] Bringup CPU227 left 29
[   24.938519] Detected PIPT I-cache on CPU227
[   24.938679] GICv3: CPU227: found redistributor 10303 region 1:0x00000004411e0000
[   24.938692] GICv3: CPU227: using allocated LPI pending table @0x0000000881730000
[   24.938758] CPU227: Booted secondary processor 0x0000010303 [0x431f0af1]
[   24.938902] Bringup CPU227 0
[   24.938906] Bringup CPU228 left 28
[   25.093622] Detected PIPT I-cache on CPU228
[   25.093789] GICv3: CPU228: found redistributor 10403 region 1:0x0000000441260000
[   25.093802] GICv3: CPU228: using allocated LPI pending table @0x0000000881740000
[   25.093868] CPU228: Booted secondary processor 0x0000010403 [0x431f0af1]
[   25.094015] Bringup CPU228 0
[   25.094019] Bringup CPU229 left 27
[   25.249382] Detected PIPT I-cache on CPU229
[   25.249548] GICv3: CPU229: found redistributor 10503 region 1:0x00000004412e0000
[   25.249560] GICv3: CPU229: using allocated LPI pending table @0x0000000881750000
[   25.249629] CPU229: Booted secondary processor 0x0000010503 [0x431f0af1]
[   25.249772] Bringup CPU229 0
[   25.249776] Bringup CPU230 left 26
[   25.404562] Detected PIPT I-cache on CPU230
[   25.404728] GICv3: CPU230: found redistributor 10603 region 1:0x0000000441360000
[   25.404741] GICv3: CPU230: using allocated LPI pending table @0x0000000881760000
[   25.404808] CPU230: Booted secondary processor 0x0000010603 [0x431f0af1]
[   25.404951] Bringup CPU230 0
[   25.404955] Bringup CPU231 left 25
[   25.559585] Detected PIPT I-cache on CPU231
[   25.559755] GICv3: CPU231: found redistributor 10703 region 1:0x00000004413e0000
[   25.559768] GICv3: CPU231: using allocated LPI pending table @0x0000000881770000
[   25.559837] CPU231: Booted secondary processor 0x0000010703 [0x431f0af1]
[   25.560097] Bringup CPU231 0
[   25.560101] Bringup CPU232 left 24
[   25.715151] Detected PIPT I-cache on CPU232
[   25.715321] GICv3: CPU232: found redistributor 10803 region 1:0x0000000441460000
[   25.715334] GICv3: CPU232: using allocated LPI pending table @0x0000000881780000
[   25.715404] CPU232: Booted secondary processor 0x0000010803 [0x431f0af1]
[   25.715556] Bringup CPU232 0
[   25.715560] Bringup CPU233 left 23
[   25.870587] Detected PIPT I-cache on CPU233
[   25.870759] GICv3: CPU233: found redistributor 10903 region 1:0x00000004414e0000
[   25.870771] GICv3: CPU233: using allocated LPI pending table @0x0000000881790000
[   25.870839] CPU233: Booted secondary processor 0x0000010903 [0x431f0af1]
[   25.870978] Bringup CPU233 0
[   25.870982] Bringup CPU234 left 22
[   26.026220] Detected PIPT I-cache on CPU234
[   26.026396] GICv3: CPU234: found redistributor 10a03 region 1:0x0000000441560000
[   26.026409] GICv3: CPU234: using allocated LPI pending table @0x00000008817a0000
[   26.026475] CPU234: Booted secondary processor 0x0000010a03 [0x431f0af1]
[   26.026623] Bringup CPU234 0
[   26.026627] Bringup CPU235 left 21
[   26.182382] Detected PIPT I-cache on CPU235
[   26.182559] GICv3: CPU235: found redistributor 10b03 region 1:0x00000004415e0000
[   26.182572] GICv3: CPU235: using allocated LPI pending table @0x00000008817b0000
[   26.182639] CPU235: Booted secondary processor 0x0000010b03 [0x431f0af1]
[   26.182782] Bringup CPU235 0
[   26.182785] Bringup CPU236 left 20
[   26.337520] Detected PIPT I-cache on CPU236
[   26.337697] GICv3: CPU236: found redistributor 10c03 region 1:0x0000000441660000
[   26.337711] GICv3: CPU236: using allocated LPI pending table @0x00000008817c0000
[   26.337777] CPU236: Booted secondary processor 0x0000010c03 [0x431f0af1]
[   26.337925] Bringup CPU236 0
[   26.337929] Bringup CPU237 left 19
[   26.493156] Detected PIPT I-cache on CPU237
[   26.493336] GICv3: CPU237: found redistributor 10d03 region 1:0x00000004416e0000
[   26.493349] GICv3: CPU237: using allocated LPI pending table @0x00000008817d0000
[   26.493418] CPU237: Booted secondary processor 0x0000010d03 [0x431f0af1]
[   26.493688] Bringup CPU237 0
[   26.493691] Bringup CPU238 left 18
[   26.648336] Detected PIPT I-cache on CPU238
[   26.648520] GICv3: CPU238: found redistributor 10e03 region 1:0x0000000441760000
[   26.648533] GICv3: CPU238: using allocated LPI pending table @0x00000008817e0000
[   26.648601] CPU238: Booted secondary processor 0x0000010e03 [0x431f0af1]
[   26.648757] Bringup CPU238 0
[   26.648761] Bringup CPU239 left 17
[   26.803436] Detected PIPT I-cache on CPU239
[   26.803620] GICv3: CPU239: found redistributor 10f03 region 1:0x00000004417e0000
[   26.803633] GICv3: CPU239: using allocated LPI pending table @0x00000008817f0000
[   26.803700] CPU239: Booted secondary processor 0x0000010f03 [0x431f0af1]
[   26.803842] Bringup CPU239 0
[   26.803845] Bringup CPU240 left 16
[   26.959245] Detected PIPT I-cache on CPU240
[   26.959431] GICv3: CPU240: found redistributor 11003 region 1:0x0000000441860000
[   26.959445] GICv3: CPU240: using allocated LPI pending table @0x0000000881800000
[   26.959513] CPU240: Booted secondary processor 0x0000011003 [0x431f0af1]
[   26.959656] Bringup CPU240 0
[   26.959660] Bringup CPU241 left 15
[   27.114861] Detected PIPT I-cache on CPU241
[   27.115049] GICv3: CPU241: found redistributor 11103 region 1:0x00000004418e0000
[   27.115061] GICv3: CPU241: using allocated LPI pending table @0x0000000881810000
[   27.115127] CPU241: Booted secondary processor 0x0000011103 [0x431f0af1]
[   27.115278] Bringup CPU241 0
[   27.115281] Bringup CPU242 left 14
[   27.270560] Detected PIPT I-cache on CPU242
[   27.270753] GICv3: CPU242: found redistributor 11203 region 1:0x0000000441960000
[   27.270767] GICv3: CPU242: using allocated LPI pending table @0x0000000881820000
[   27.270834] CPU242: Booted secondary processor 0x0000011203 [0x431f0af1]
[   27.270980] Bringup CPU242 0
[   27.270983] Bringup CPU243 left 13
[   27.426107] Detected PIPT I-cache on CPU243
[   27.426299] GICv3: CPU243: found redistributor 11303 region 1:0x00000004419e0000
[   27.426312] GICv3: CPU243: using allocated LPI pending table @0x0000000881830000
[   27.426380] CPU243: Booted secondary processor 0x0000011303 [0x431f0af1]
[   27.426525] Bringup CPU243 0
[   27.426529] Bringup CPU244 left 12
[   27.581504] Detected PIPT I-cache on CPU244
[   27.581699] GICv3: CPU244: found redistributor 11403 region 1:0x0000000441a60000
[   27.581713] GICv3: CPU244: using allocated LPI pending table @0x0000000881840000
[   27.581781] CPU244: Booted secondary processor 0x0000011403 [0x431f0af1]
[   27.582055] Bringup CPU244 0
[   27.582059] Bringup CPU245 left 11
[   27.737400] Detected PIPT I-cache on CPU245
[   27.737596] GICv3: CPU245: found redistributor 11503 region 1:0x0000000441ae0000
[   27.737608] GICv3: CPU245: using allocated LPI pending table @0x0000000881850000
[   27.737676] CPU245: Booted secondary processor 0x0000011503 [0x431f0af1]
[   27.737830] Bringup CPU245 0
[   27.737833] Bringup CPU246 left 10
[   27.892579] Detected PIPT I-cache on CPU246
[   27.892778] GICv3: CPU246: found redistributor 11603 region 1:0x0000000441b60000
[   27.892791] GICv3: CPU246: using allocated LPI pending table @0x0000000881860000
[   27.892857] CPU246: Booted secondary processor 0x0000011603 [0x431f0af1]
[   27.893004] Bringup CPU246 0
[   27.893008] Bringup CPU247 left 9
[   28.047725] Detected PIPT I-cache on CPU247
[   28.047925] GICv3: CPU247: found redistributor 11703 region 1:0x0000000441be0000
[   28.047938] GICv3: CPU247: using allocated LPI pending table @0x0000000881870000
[   28.048008] CPU247: Booted secondary processor 0x0000011703 [0x431f0af1]
[   28.048159] Bringup CPU247 0
[   28.048163] Bringup CPU248 left 8
[   28.203549] Detected PIPT I-cache on CPU248
[   28.203752] GICv3: CPU248: found redistributor 11803 region 1:0x0000000441c60000
[   28.203765] GICv3: CPU248: using allocated LPI pending table @0x0000000881880000
[   28.203831] CPU248: Booted secondary processor 0x0000011803 [0x431f0af1]
[   28.203976] Bringup CPU248 0
[   28.203980] Bringup CPU249 left 7
[   28.359259] Detected PIPT I-cache on CPU249
[   28.359465] GICv3: CPU249: found redistributor 11903 region 1:0x0000000441ce0000
[   28.359478] GICv3: CPU249: using allocated LPI pending table @0x0000000881890000
[   28.359546] CPU249: Booted secondary processor 0x0000011903 [0x431f0af1]
[   28.359696] Bringup CPU249 0
[   28.359699] Bringup CPU250 left 6
[   28.514990] Detected PIPT I-cache on CPU250
[   28.515196] GICv3: CPU250: found redistributor 11a03 region 1:0x0000000441d60000
[   28.515209] GICv3: CPU250: using allocated LPI pending table @0x00000008818a0000
[   28.515278] CPU250: Booted secondary processor 0x0000011a03 [0x431f0af1]
[   28.515427] Bringup CPU250 0
[   28.515431] Bringup CPU251 left 5
[   28.670202] Detected PIPT I-cache on CPU251
[   28.670410] GICv3: CPU251: found redistributor 11b03 region 1:0x0000000441de0000
[   28.670424] GICv3: CPU251: using allocated LPI pending table @0x00000008818b0000
[   28.670490] CPU251: Booted secondary processor 0x0000011b03 [0x431f0af1]
[   28.670764] Bringup CPU251 0
[   28.670767] Bringup CPU252 left 4
[   28.826271] Detected PIPT I-cache on CPU252
[   28.826482] GICv3: CPU252: found redistributor 11c03 region 1:0x0000000441e60000
[   28.826495] GICv3: CPU252: using allocated LPI pending table @0x00000008818c0000
[   28.826564] CPU252: Booted secondary processor 0x0000011c03 [0x431f0af1]
[   28.826721] Bringup CPU252 0
[   28.826725] Bringup CPU253 left 3
[   28.981779] Detected PIPT I-cache on CPU253
[   28.981990] GICv3: CPU253: found redistributor 11d03 region 1:0x0000000441ee0000
[   28.982003] GICv3: CPU253: using allocated LPI pending table @0x00000008818d0000
[   28.982071] CPU253: Booted secondary processor 0x0000011d03 [0x431f0af1]
[   28.982224] Bringup CPU253 0
[   28.982227] Bringup CPU254 left 2
[   29.137440] Detected PIPT I-cache on CPU254
[   29.137658] GICv3: CPU254: found redistributor 11e03 region 1:0x0000000441f60000
[   29.137671] GICv3: CPU254: using allocated LPI pending table @0x00000008818e0000
[   29.137737] CPU254: Booted secondary processor 0x0000011e03 [0x431f0af1]
[   29.137895] Bringup CPU254 0
[   29.137899] Bringup CPU255 left 1
[   29.137901] smp: Brought up 2 nodes, 255 CPUs
[   29.137904] SMP: Total of 255 processors activated.
Re: [patch V4 33/37] cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
Posted by Thomas Gleixner 11 months, 2 weeks ago
On Mon, May 22 2023 at 23:27, Mark Brown wrote:
> On Mon, May 22, 2023 at 11:04:17PM +0200, Thomas Gleixner wrote:
>
>> That does not make any sense at all and my tired brain does not help
>> either.
>
>> Can you please apply the below debug patch and provide the output?
>
> Here's the log, a quick glance says the 
>
> 	if (!--ncpus)
> 		break;
>
> check is doing the wrong thing

Obviously.

Let me find a brown paperbag and go to sleep before I even try to
compile the obvious fix.

---
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 005f863a3d2b..88a7ede322bd 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1770,9 +1770,6 @@ static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int n
 	for_each_cpu(cpu, mask) {
 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
-		if (!--ncpus)
-			break;
-
 		if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
 			/*
 			 * If this failed then cpu_up() might have only
@@ -1781,6 +1778,9 @@ static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int n
 			 */
 			WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
 		}
+
+		if (!--ncpus)
+			break;
 	}
 }
Re: [patch V4 33/37] cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
Posted by Mark Brown 11 months, 2 weeks ago
On Tue, May 23, 2023 at 01:12:26AM +0200, Thomas Gleixner wrote:

> Let me find a brown paperbag and go to sleep before I even try to
> compile the obvious fix.

That fixes the problem on TX2 - thanks!

Tested-by: Mark Brown <broonie@kernel.org>
[patch V4 34/37] x86/apic: Save the APIC virtual base address
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

For parallel CPU brinugp it's required to read the APIC ID in the low level
startup code. The virtual APIC base address is a constant because its a
fix-mapped address. Exposing that constant which is composed via macros to
assembly code is non-trivial due to header inclusion hell.

Aside of that it's constant only because of the vsyscall ABI
requirement. Once vsyscall is out of the picture the fixmap can be placed
at runtime.

Avoid header hell, stay flexible and store the address in a variable which
can be exposed to the low level startup code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
V4: Fixed changelog typo - Sergey
---
 arch/x86/include/asm/smp.h  |    1 +
 arch/x86/kernel/apic/apic.c |    4 ++++
 2 files changed, 5 insertions(+)
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -196,6 +196,7 @@ extern void nmi_selftest(void);
 #endif
 
 extern unsigned int smpboot_control;
+extern unsigned long apic_mmio_base;
 
 #endif /* !__ASSEMBLY__ */
 
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -101,6 +101,9 @@ static int apic_extnmi __ro_after_init =
  */
 static bool virt_ext_dest_id __ro_after_init;
 
+/* For parallel bootup. */
+unsigned long apic_mmio_base __ro_after_init;
+
 /*
  * Map cpu index to physical APIC ID
  */
@@ -2163,6 +2166,7 @@ void __init register_lapic_address(unsig
 
 	if (!x2apic_mode) {
 		set_fixmap_nocache(FIX_APIC_BASE, address);
+		apic_mmio_base = APIC_BASE;
 		apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
 			    APIC_BASE, address);
 	}
[patch V4 35/37] x86/smpboot: Implement a bit spinlock to protect the realmode stack
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Parallel AP bringup requires that the APs can run fully parallel through
the early startup code including the real mode trampoline.

To prepare for this implement a bit-spinlock to serialize access to the
real mode stack so that parallel upcoming APs are not going to corrupt each
others stack while going through the real mode startup code.

Co-developed-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
V4: Simplify the lock implementation - Peter Z.
---
 arch/x86/include/asm/realmode.h      |    3 +++
 arch/x86/kernel/head_64.S            |   12 ++++++++++++
 arch/x86/realmode/init.c             |    3 +++
 arch/x86/realmode/rm/trampoline_64.S |   23 ++++++++++++++++++-----
 4 files changed, 36 insertions(+), 5 deletions(-)
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -52,6 +52,7 @@ struct trampoline_header {
 	u64 efer;
 	u32 cr4;
 	u32 flags;
+	u32 lock;
 #endif
 };
 
@@ -64,6 +65,8 @@ extern unsigned long initial_stack;
 extern unsigned long initial_vc_handler;
 #endif
 
+extern u32 *trampoline_lock;
+
 extern unsigned char real_mode_blob[];
 extern unsigned char real_mode_relocs[];
 
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -252,6 +252,16 @@ SYM_INNER_LABEL(secondary_startup_64_no_
 	movq	TASK_threadsp(%rax), %rsp
 
 	/*
+	 * Now that this CPU is running on its own stack, drop the realmode
+	 * protection. For the boot CPU the pointer is NULL!
+	 */
+	movq	trampoline_lock(%rip), %rax
+	testq	%rax, %rax
+	jz	.Lsetup_gdt
+	movl	$0, (%rax)
+
+.Lsetup_gdt:
+	/*
 	 * We must switch to a new descriptor in kernel space for the GDT
 	 * because soon the kernel won't have access anymore to the userspace
 	 * addresses where we're currently running on. We have to do that here
@@ -433,6 +443,8 @@ SYM_DATA(initial_code,	.quad x86_64_star
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 SYM_DATA(initial_vc_handler,	.quad handle_vc_boot_ghcb)
 #endif
+
+SYM_DATA(trampoline_lock, .quad 0);
 	__FINITDATA
 
 	__INIT
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -154,6 +154,9 @@ static void __init setup_real_mode(void)
 
 	trampoline_header->flags = 0;
 
+	trampoline_lock = &trampoline_header->lock;
+	*trampoline_lock = 0;
+
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
 
 	/* Map the real mode stub as virtual == physical */
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -37,6 +37,20 @@
 	.text
 	.code16
 
+.macro LOAD_REALMODE_ESP
+	/*
+	 * Make sure only one CPU fiddles with the realmode stack
+	*/
+.Llock_rm\@:
+        lock btsl       $0, tr_lock
+        jnc             2f
+        pause
+        jmp             .Llock_rm\@
+2:
+	# Setup stack
+	movl	$rm_stack_end, %esp
+.endm
+
 	.balign	PAGE_SIZE
 SYM_CODE_START(trampoline_start)
 	cli			# We should be safe anyway
@@ -49,8 +63,7 @@ SYM_CODE_START(trampoline_start)
 	mov	%ax, %es
 	mov	%ax, %ss
 
-	# Setup stack
-	movl	$rm_stack_end, %esp
+	LOAD_REALMODE_ESP
 
 	call	verify_cpu		# Verify the cpu supports long mode
 	testl   %eax, %eax		# Check for return code
@@ -93,8 +106,7 @@ SYM_CODE_START(sev_es_trampoline_start)
 	mov	%ax, %es
 	mov	%ax, %ss
 
-	# Setup stack
-	movl	$rm_stack_end, %esp
+	LOAD_REALMODE_ESP
 
 	jmp	.Lswitch_to_protected
 SYM_CODE_END(sev_es_trampoline_start)
@@ -177,7 +189,7 @@ SYM_CODE_START(pa_trampoline_compat)
 	 * In compatibility mode.  Prep ESP and DX for startup_32, then disable
 	 * paging and complete the switch to legacy 32-bit mode.
 	 */
-	movl	$rm_stack_end, %esp
+	LOAD_REALMODE_ESP
 	movw	$__KERNEL_DS, %dx
 
 	movl	$(CR0_STATE & ~X86_CR0_PG), %eax
@@ -241,6 +253,7 @@ SYM_DATA_START(trampoline_header)
 	SYM_DATA(tr_efer,		.space 8)
 	SYM_DATA(tr_cr4,		.space 4)
 	SYM_DATA(tr_flags,		.space 4)
+	SYM_DATA(tr_lock,		.space 4)
 SYM_DATA_END(trampoline_header)
 
 #include "trampoline_common.S"
[patch V4 36/37] x86/smpboot: Support parallel startup of secondary CPUs
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: David Woodhouse <dwmw@amazon.co.uk>

In parallel startup mode the APs are kicked alive by the control CPU
quickly after each other and run through the early startup code in
parallel. The real-mode startup code is already serialized with a
bit-spinlock to protect the real-mode stack.

In parallel startup mode the smpboot_control variable obviously cannot
contain the Linux CPU number so the APs have to determine their Linux CPU
number on their own. This is required to find the CPUs per CPU offset in
order to find the idle task stack and other per CPU data.

To achieve this, export the cpuid_to_apicid[] array so that each AP can
find its own CPU number by searching therein based on its APIC ID.

Introduce a flag in the top bits of smpboot_control which indicates that
the AP should find its CPU number by reading the APIC ID from the APIC.

This is required because CPUID based APIC ID retrieval can only provide the
initial APIC ID, which might have been overruled by the firmware. Some AMD
APUs come up with APIC ID = initial APIC ID + 0x10, so the APIC ID to CPU
number lookup would fail miserably if based on CPUID. Also virtualization
can make its own APIC ID assignements. The only requirement is that the
APIC IDs are consistent with the APCI/MADT table.

For the boot CPU or in case parallel bringup is disabled the control bits
are empty and the CPU number is directly available in bit 0-23 of
smpboot_control.

[ tglx: Initial proof of concept patch with bitlock and APIC ID lookup ]
[ dwmw2: Rework and testing, commit message, CPUID 0x1 and CPU0 support ]
[ seanc: Fix stray override of initial_gs in common_cpu_up() ]
[ Oleksandr Natalenko: reported suspend/resume issue fixed in
  x86_acpi_suspend_lowlevel ]
[ tglx: Make it read the APIC ID from the APIC instead of using CPUID,
  	split the bitlock part out ]

Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
Co-developed-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
V4: Remove the lock prefix in the error path - Peter Z.
---
 arch/x86/include/asm/apic.h    |    2 +
 arch/x86/include/asm/apicdef.h |    5 ++-
 arch/x86/include/asm/smp.h     |    6 ++++
 arch/x86/kernel/acpi/sleep.c   |    9 +++++-
 arch/x86/kernel/apic/apic.c    |    2 -
 arch/x86/kernel/head_64.S      |   61 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/smpboot.c      |    2 -
 7 files changed, 83 insertions(+), 4 deletions(-)
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -55,6 +55,8 @@ extern int local_apic_timer_c2_ok;
 extern int disable_apic;
 extern unsigned int lapic_timer_period;
 
+extern int cpuid_to_apicid[];
+
 extern enum apic_intr_mode_id apic_intr_mode;
 enum apic_intr_mode_id {
 	APIC_PIC,
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -138,7 +138,8 @@
 #define		APIC_EILVT_MASKED	(1 << 16)
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-#define APIC_BASE_MSR	0x800
+#define APIC_BASE_MSR		0x800
+#define APIC_X2APIC_ID_MSR	0x802
 #define XAPIC_ENABLE	(1UL << 11)
 #define X2APIC_ENABLE	(1UL << 10)
 
@@ -162,6 +163,7 @@
 #define APIC_CPUID(apicid)	((apicid) & XAPIC_DEST_CPUS_MASK)
 #define NUM_APIC_CLUSTERS	((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
 
+#ifndef __ASSEMBLY__
 /*
  * the local APIC register structure, memory mapped. Not terribly well
  * tested, but we might eventually use this one in the future - the
@@ -435,4 +437,5 @@ enum apic_delivery_modes {
 	APIC_DELIVERY_MODE_EXTINT	= 7,
 };
 
+#endif /* !__ASSEMBLY__ */
 #endif /* _ASM_X86_APICDEF_H */
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -200,4 +200,10 @@ extern unsigned long apic_mmio_base;
 
 #endif /* !__ASSEMBLY__ */
 
+/* Control bits for startup_64 */
+#define STARTUP_READ_APICID	0x80000000
+
+/* Top 8 bits are reserved for control */
+#define STARTUP_PARALLEL_MASK	0xFF000000
+
 #endif /* _ASM_X86_SMP_H */
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -16,6 +16,7 @@
 #include <asm/cacheflush.h>
 #include <asm/realmode.h>
 #include <asm/hypervisor.h>
+#include <asm/smp.h>
 
 #include <linux/ftrace.h>
 #include "../../realmode/rm/wakeup.h"
@@ -127,7 +128,13 @@ int x86_acpi_suspend_lowlevel(void)
 	 * value is in the actual %rsp register.
 	 */
 	current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
-	smpboot_control = smp_processor_id();
+	/*
+	 * Ensure the CPU knows which one it is when it comes back, if
+	 * it isn't in parallel mode and expected to work that out for
+	 * itself.
+	 */
+	if (!(smpboot_control & STARTUP_PARALLEL_MASK))
+		smpboot_control = smp_processor_id();
 #endif
 	initial_code = (unsigned long)wakeup_long64;
 	saved_magic = 0x123456789abcdef0L;
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2380,7 +2380,7 @@ static int nr_logical_cpuids = 1;
 /*
  * Used to store mapping between logical CPU IDs and APIC IDs.
  */
-static int cpuid_to_apicid[] = {
+int cpuid_to_apicid[] = {
 	[0 ... NR_CPUS - 1] = -1,
 };
 
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -24,7 +24,9 @@
 #include "../entry/calling.h"
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
+#include <asm/apicdef.h>
 #include <asm/fixmap.h>
+#include <asm/smp.h>
 
 /*
  * We are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -234,8 +236,67 @@ SYM_INNER_LABEL(secondary_startup_64_no_
 	ANNOTATE_NOENDBR // above
 
 #ifdef CONFIG_SMP
+	/*
+	 * For parallel boot, the APIC ID is read from the APIC, and then
+	 * used to look up the CPU number.  For booting a single CPU, the
+	 * CPU number is encoded in smpboot_control.
+	 *
+	 * Bit 31	STARTUP_READ_APICID (Read APICID from APIC)
+	 * Bit 0-23	CPU# if STARTUP_xx flags are not set
+	 */
 	movl	smpboot_control(%rip), %ecx
+	testl	$STARTUP_READ_APICID, %ecx
+	jnz	.Lread_apicid
+	/*
+	 * No control bit set, single CPU bringup. CPU number is provided
+	 * in bit 0-23. This is also the boot CPU case (CPU number 0).
+	 */
+	andl	$(~STARTUP_PARALLEL_MASK), %ecx
+	jmp	.Lsetup_cpu
+
+.Lread_apicid:
+	/* Check whether X2APIC mode is already enabled */
+	mov	$MSR_IA32_APICBASE, %ecx
+	rdmsr
+	testl	$X2APIC_ENABLE, %eax
+	jnz	.Lread_apicid_msr
+
+	/* Read the APIC ID from the fix-mapped MMIO space. */
+	movq	apic_mmio_base(%rip), %rcx
+	addq	$APIC_ID, %rcx
+	movl	(%rcx), %eax
+	shr	$24, %eax
+	jmp	.Llookup_AP
+
+.Lread_apicid_msr:
+	mov	$APIC_X2APIC_ID_MSR, %ecx
+	rdmsr
+
+.Llookup_AP:
+	/* EAX contains the APIC ID of the current CPU */
+	xorq	%rcx, %rcx
+	leaq	cpuid_to_apicid(%rip), %rbx
+
+.Lfind_cpunr:
+	cmpl	(%rbx,%rcx,4), %eax
+	jz	.Lsetup_cpu
+	inc	%ecx
+#ifdef CONFIG_FORCE_NR_CPUS
+	cmpl	$NR_CPUS, %ecx
+#else
+	cmpl	nr_cpu_ids(%rip), %ecx
+#endif
+	jb	.Lfind_cpunr
+
+	/*  APIC ID not found in the table. Drop the trampoline lock and bail. */
+	movq	trampoline_lock(%rip), %rax
+	movl	$0, (%rax)
+
+1:	cli
+	hlt
+	jmp	1b
 
+.Lsetup_cpu:
 	/* Get the per cpu offset for the given CPU# which is in ECX */
 	movq	__per_cpu_offset(,%rcx,8), %rdx
 #else
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -996,7 +996,7 @@ static int do_boot_cpu(int apicid, int c
 	if (IS_ENABLED(CONFIG_X86_32)) {
 		early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
 		initial_stack  = idle->thread.sp;
-	} else {
+	} else if (!(smpboot_control & STARTUP_PARALLEL_MASK)) {
 		smpboot_control = cpu;
 	}
Re: [patch V4 36/37] x86/smpboot: Support parallel startup of secondary CPUs
Posted by Jeffrey Hugo 11 months, 3 weeks ago
On 5/12/2023 3:07 PM, Thomas Gleixner wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> In parallel startup mode the APs are kicked alive by the control CPU
> quickly after each other and run through the early startup code in
> parallel. The real-mode startup code is already serialized with a
> bit-spinlock to protect the real-mode stack.
> 
> In parallel startup mode the smpboot_control variable obviously cannot
> contain the Linux CPU number so the APs have to determine their Linux CPU
> number on their own. This is required to find the CPUs per CPU offset in
> order to find the idle task stack and other per CPU data.
> 
> To achieve this, export the cpuid_to_apicid[] array so that each AP can
> find its own CPU number by searching therein based on its APIC ID.
> 
> Introduce a flag in the top bits of smpboot_control which indicates that
> the AP should find its CPU number by reading the APIC ID from the APIC.
> 
> This is required because CPUID based APIC ID retrieval can only provide the
> initial APIC ID, which might have been overruled by the firmware. Some AMD
> APUs come up with APIC ID = initial APIC ID + 0x10, so the APIC ID to CPU
> number lookup would fail miserably if based on CPUID. Also virtualization
> can make its own APIC ID assignements. The only requirement is that the
> APIC IDs are consistent with the APCI/MADT table.
> 
> For the boot CPU or in case parallel bringup is disabled the control bits
> are empty and the CPU number is directly available in bit 0-23 of
> smpboot_control.
> 
> [ tglx: Initial proof of concept patch with bitlock and APIC ID lookup ]
> [ dwmw2: Rework and testing, commit message, CPUID 0x1 and CPU0 support ]
> [ seanc: Fix stray override of initial_gs in common_cpu_up() ]
> [ Oleksandr Natalenko: reported suspend/resume issue fixed in
>    x86_acpi_suspend_lowlevel ]
> [ tglx: Make it read the APIC ID from the APIC instead of using CPUID,
>    	split the bitlock part out ]
> 
> Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
> Co-developed-by: Brian Gerst <brgerst@gmail.com>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Michael Kelley <mikelley@microsoft.com>
> ---

I pulled in this change via the next tree, tag next-20230519 and I get a 
build failure using the x86_64_defconfig -

   DESCEND objtool
   INSTALL libsubcmd_headers
   CALL    scripts/checksyscalls.sh
   AS      arch/x86/kernel/head_64.o
arch/x86/kernel/head_64.S: Assembler messages:
arch/x86/kernel/head_64.S:261: Error: missing ')'
arch/x86/kernel/head_64.S:261: Error: junk `UL<<10)' after expression
   CC      arch/x86/kernel/head64.o
   CC      arch/x86/kernel/ebda.o
   CC      arch/x86/kernel/platform-quirks.o
scripts/Makefile.build:374: recipe for target 
'arch/x86/kernel/head_64.o' failed
make[3]: *** [arch/x86/kernel/head_64.o] Error 1
make[3]: *** Waiting for unfinished jobs....
scripts/Makefile.build:494: recipe for target 'arch/x86/kernel' failed
make[2]: *** [arch/x86/kernel] Error 2
scripts/Makefile.build:494: recipe for target 'arch/x86' failed
make[1]: *** [arch/x86] Error 2
make[1]: *** Waiting for unfinished jobs....
Makefile:2026: recipe for target '.' failed
make: *** [.] Error 2

This is with GCC 5.4.0, if it matters.

Reverting this change allows the build to move forward, although I also 
need to revert "x86/smpboot/64: Implement 
arch_cpuhp_init_parallel_bringup() and enable it" for the build to fully 
succeed.

I'm not familiar with this code, and nothing obvious stands out to me. 
What can I do to help root cause this?

-Jeff
Re: [patch V4 36/37] x86/smpboot: Support parallel startup of secondary CPUs
Posted by Andrew Cooper 11 months, 3 weeks ago
On 19/05/2023 5:28 pm, Jeffrey Hugo wrote:
>   DESCEND objtool
>   INSTALL libsubcmd_headers
>   CALL    scripts/checksyscalls.sh
>   AS      arch/x86/kernel/head_64.o
> arch/x86/kernel/head_64.S: Assembler messages:
> arch/x86/kernel/head_64.S:261: Error: missing ')'
> arch/x86/kernel/head_64.S:261: Error: junk `UL<<10)' after expression
>   CC      arch/x86/kernel/head64.o
>   CC      arch/x86/kernel/ebda.o
>   CC      arch/x86/kernel/platform-quirks.o
> scripts/Makefile.build:374: recipe for target
> 'arch/x86/kernel/head_64.o' failed
> make[3]: *** [arch/x86/kernel/head_64.o] Error 1
> make[3]: *** Waiting for unfinished jobs....
> scripts/Makefile.build:494: recipe for target 'arch/x86/kernel' failed
> make[2]: *** [arch/x86/kernel] Error 2
> scripts/Makefile.build:494: recipe for target 'arch/x86' failed
> make[1]: *** [arch/x86] Error 2
> make[1]: *** Waiting for unfinished jobs....
> Makefile:2026: recipe for target '.' failed
> make: *** [.] Error 2
>
> This is with GCC 5.4.0, if it matters.
>
> Reverting this change allows the build to move forward, although I
> also need to revert "x86/smpboot/64: Implement
> arch_cpuhp_init_parallel_bringup() and enable it" for the build to
> fully succeed.
>
> I'm not familiar with this code, and nothing obvious stands out to me.
> What can I do to help root cause this?

Can you try:

-#define XAPIC_ENABLE    (1UL << 11)
-#define X2APIC_ENABLE    (1UL << 10)
+#define XAPIC_ENABLE    BIT(11)
+#define X2APIC_ENABLE    BIT(10)

The UL suffix isn't understood by older binutils, and this patch adds
the first use of these constants in assembly.

~Andrew

Re: [patch V4 36/37] x86/smpboot: Support parallel startup of secondary CPUs
Posted by Jeffrey Hugo 11 months, 3 weeks ago
On 5/19/2023 10:57 AM, Andrew Cooper wrote:
> On 19/05/2023 5:28 pm, Jeffrey Hugo wrote:
>>    DESCEND objtool
>>    INSTALL libsubcmd_headers
>>    CALL    scripts/checksyscalls.sh
>>    AS      arch/x86/kernel/head_64.o
>> arch/x86/kernel/head_64.S: Assembler messages:
>> arch/x86/kernel/head_64.S:261: Error: missing ')'
>> arch/x86/kernel/head_64.S:261: Error: junk `UL<<10)' after expression
>>    CC      arch/x86/kernel/head64.o
>>    CC      arch/x86/kernel/ebda.o
>>    CC      arch/x86/kernel/platform-quirks.o
>> scripts/Makefile.build:374: recipe for target
>> 'arch/x86/kernel/head_64.o' failed
>> make[3]: *** [arch/x86/kernel/head_64.o] Error 1
>> make[3]: *** Waiting for unfinished jobs....
>> scripts/Makefile.build:494: recipe for target 'arch/x86/kernel' failed
>> make[2]: *** [arch/x86/kernel] Error 2
>> scripts/Makefile.build:494: recipe for target 'arch/x86' failed
>> make[1]: *** [arch/x86] Error 2
>> make[1]: *** Waiting for unfinished jobs....
>> Makefile:2026: recipe for target '.' failed
>> make: *** [.] Error 2
>>
>> This is with GCC 5.4.0, if it matters.
>>
>> Reverting this change allows the build to move forward, although I
>> also need to revert "x86/smpboot/64: Implement
>> arch_cpuhp_init_parallel_bringup() and enable it" for the build to
>> fully succeed.
>>
>> I'm not familiar with this code, and nothing obvious stands out to me.
>> What can I do to help root cause this?
> 
> Can you try:
> 
> -#define XAPIC_ENABLE    (1UL << 11)
> -#define X2APIC_ENABLE    (1UL << 10)
> +#define XAPIC_ENABLE    BIT(11)
> +#define X2APIC_ENABLE    BIT(10)
> 
> The UL suffix isn't understood by older binutils, and this patch adds
> the first use of these constants in assembly.

Ah, makes sense.

Your suggested change works for me.  No more compile error.

I assume you will be following up with a patch to address this.  Feel 
free to add the following tags as you see fit:

Reported-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Tested-by: Jeffrey Hugo <quic_jhugo@quicinc.com>

-Jeff
[patch V4 37/37] x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it
Posted by Thomas Gleixner 11 months, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

Implement the validation function which tells the core code whether
parallel bringup is possible.

The only condition for now is that the kernel does not run in an encrypted
guest as these will trap the RDMSR via #VC, which cannot be handled at that
point in early startup.

There was an earlier variant for AMD-SEV which used the GHBC protocol for
retrieving the APIC ID via CPUID, but there is no guarantee that the
initial APIC ID in CPUID is the same as the real APIC ID. There is no
enforcement from the secure firmware and the hypervisor can assign APIC IDs
as it sees fit as long as the ACPI/MADT table is consistent with that
assignment.

Unfortunately there is no RDMSR GHCB protocol at the moment, so enabling
AMD-SEV guests for parallel startup needs some more thought.

Intel-TDX provides a secure RDMSR hypercall, but supporting that is outside
the scope of this change.

Fixup announce_cpu() as e.g. on Hyper-V CPU1 is the secondary sibling of
CPU0, which makes the @cpu == 1 logic in announce_cpu() fall apart.

[ mikelley: Reported the announce_cpu() fallout

Originally-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
---
V2: Fixup announce_cpu() - Michael Kelley
V3: Fixup announce_cpu() for real - Michael Kelley
---
 arch/x86/Kconfig             |    3 -
 arch/x86/kernel/cpu/common.c |    6 --
 arch/x86/kernel/smpboot.c    |   87 +++++++++++++++++++++++++++++++++++--------
 3 files changed, 75 insertions(+), 21 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -274,8 +274,9 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_USER_RETURN_NOTIFIER
 	select HAVE_GENERIC_VDSO
+	select HOTPLUG_PARALLEL			if SMP && X86_64
 	select HOTPLUG_SMT			if SMP
-	select HOTPLUG_SPLIT_STARTUP		if SMP
+	select HOTPLUG_SPLIT_STARTUP		if SMP && X86_32
 	select IRQ_FORCED_THREADING
 	select NEED_PER_CPU_EMBED_FIRST_CHUNK
 	select NEED_PER_CPU_PAGE_FIRST_CHUNK
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2128,11 +2128,7 @@ static inline void setup_getcpu(int cpu)
 }
 
 #ifdef CONFIG_X86_64
-static inline void ucode_cpu_init(int cpu)
-{
-	if (cpu)
-		load_ucode_ap();
-}
+static inline void ucode_cpu_init(int cpu) { }
 
 static inline void tss_setup_ist(struct tss_struct *tss)
 {
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -58,6 +58,7 @@
 #include <linux/overflow.h>
 #include <linux/stackprotector.h>
 #include <linux/cpuhotplug.h>
+#include <linux/mc146818rtc.h>
 
 #include <asm/acpi.h>
 #include <asm/cacheinfo.h>
@@ -75,7 +76,7 @@
 #include <asm/fpu/api.h>
 #include <asm/setup.h>
 #include <asm/uv/uv.h>
-#include <linux/mc146818rtc.h>
+#include <asm/microcode.h>
 #include <asm/i8259.h>
 #include <asm/misc.h>
 #include <asm/qspinlock.h>
@@ -128,7 +129,6 @@ int arch_update_cpu_topology(void)
 	return retval;
 }
 
-
 static unsigned int smpboot_warm_reset_vector_count;
 
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
@@ -226,16 +226,43 @@ static void notrace start_secondary(void
 	 */
 	cr4_init();
 
-#ifdef CONFIG_X86_32
-	/* switch away from the initial page table */
-	load_cr3(swapper_pg_dir);
-	__flush_tlb_all();
-#endif
+	/*
+	 * 32-bit specific. 64-bit reaches this code with the correct page
+	 * table established. Yet another historical divergence.
+	 */
+	if (IS_ENABLED(CONFIG_X86_32)) {
+		/* switch away from the initial page table */
+		load_cr3(swapper_pg_dir);
+		__flush_tlb_all();
+	}
+
 	cpu_init_exception_handling();
 
 	/*
-	 * Synchronization point with the hotplug core. Sets the
-	 * synchronization state to ALIVE and waits for the control CPU to
+	 * 32-bit systems load the microcode from the ASM startup code for
+	 * historical reasons.
+	 *
+	 * On 64-bit systems load it before reaching the AP alive
+	 * synchronization point below so it is not part of the full per
+	 * CPU serialized bringup part when "parallel" bringup is enabled.
+	 *
+	 * That's even safe when hyperthreading is enabled in the CPU as
+	 * the core code starts the primary threads first and leaves the
+	 * secondary threads waiting for SIPI. Loading microcode on
+	 * physical cores concurrently is a safe operation.
+	 *
+	 * This covers both the Intel specific issue that concurrent
+	 * microcode loading on SMT siblings must be prohibited and the
+	 * vendor independent issue`that microcode loading which changes
+	 * CPUID, MSRs etc. must be strictly serialized to maintain
+	 * software state correctness.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64))
+		load_ucode_ap();
+
+	/*
+	 * Synchronization point with the hotplug core. Sets this CPUs
+	 * synchronization state to ALIVE and spin-waits for the control CPU to
 	 * release this CPU for further bringup.
 	 */
 	cpuhp_ap_sync_alive();
@@ -918,9 +945,9 @@ static int wakeup_secondary_cpu_via_init
 /* reduce the number of lines printed when booting a large cpu count system */
 static void announce_cpu(int cpu, int apicid)
 {
+	static int width, node_width, first = 1;
 	static int current_node = NUMA_NO_NODE;
 	int node = early_cpu_to_node(cpu);
-	static int width, node_width;
 
 	if (!width)
 		width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */
@@ -928,10 +955,10 @@ static void announce_cpu(int cpu, int ap
 	if (!node_width)
 		node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */
 
-	if (cpu == 1)
-		printk(KERN_INFO "x86: Booting SMP configuration:\n");
-
 	if (system_state < SYSTEM_RUNNING) {
+		if (first)
+			pr_info("x86: Booting SMP configuration:\n");
+
 		if (node != current_node) {
 			if (current_node > (-1))
 				pr_cont("\n");
@@ -942,11 +969,11 @@ static void announce_cpu(int cpu, int ap
 		}
 
 		/* Add padding for the BSP */
-		if (cpu == 1)
+		if (first)
 			pr_cont("%*s", width + 1, " ");
+		first = 0;
 
 		pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
-
 	} else
 		pr_info("Booting Node %d Processor %d APIC 0x%x\n",
 			node, cpu, apicid);
@@ -1236,6 +1263,36 @@ void __init smp_prepare_cpus_common(void
 	set_cpu_sibling_map(0);
 }
 
+#ifdef CONFIG_X86_64
+/* Establish whether parallel bringup can be supported. */
+bool __init arch_cpuhp_init_parallel_bringup(void)
+{
+	/*
+	 * Encrypted guests require special handling. They enforce X2APIC
+	 * mode but the RDMSR to read the APIC ID is intercepted and raises
+	 * #VC or #VE which cannot be handled in the early startup code.
+	 *
+	 * AMD-SEV does not provide a RDMSR GHCB protocol so the early
+	 * startup code cannot directly communicate with the secure
+	 * firmware. The alternative solution to retrieve the APIC ID via
+	 * CPUID(0xb), which is covered by the GHCB protocol, is not viable
+	 * either because there is no enforcement of the CPUID(0xb)
+	 * provided "initial" APIC ID to be the same as the real APIC ID.
+	 *
+	 * Intel-TDX has a secure RDMSR hypercall, but that needs to be
+	 * implemented seperately in the low level startup ASM code.
+	 */
+	if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
+		pr_info("Parallel CPU startup disabled due to guest state encryption\n");
+		return false;
+	}
+
+	smpboot_control = STARTUP_READ_APICID;
+	pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control);
+	return true;
+}
+#endif
+
 /*
  * Prepare for SMP bootup.
  * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
Re: [patch V4 37/37] x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it
Posted by Peter Zijlstra 11 months, 3 weeks ago
On Fri, May 12, 2023 at 11:07:56PM +0200, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> Implement the validation function which tells the core code whether
> parallel bringup is possible.
> 
> The only condition for now is that the kernel does not run in an encrypted
> guest as these will trap the RDMSR via #VC, which cannot be handled at that
> point in early startup.
> 
> There was an earlier variant for AMD-SEV which used the GHBC protocol for
> retrieving the APIC ID via CPUID, but there is no guarantee that the
> initial APIC ID in CPUID is the same as the real APIC ID. There is no
> enforcement from the secure firmware and the hypervisor can assign APIC IDs
> as it sees fit as long as the ACPI/MADT table is consistent with that
> assignment.
> 
> Unfortunately there is no RDMSR GHCB protocol at the moment, so enabling
> AMD-SEV guests for parallel startup needs some more thought.

One option, other than adding said protocol, would be to:

 - use the APICID from CPUID -- with the expectation that it can be
   wrong.
 - (ab)use one of the high bits in cpuid_to_apicid[] as a test-and-set
   trylock. This avoids two CPUs from using the same per-cpu base, if
   CPUID is being malicious. Panic on fail.
 - validate against MSR the moment we can and panic if not matching

The trylock ensures the stacks/percpu state is not used by multiple
CPUs, and should guarantee a coherent state to get far enough along to
be able to do the #VE inducing RDMSR.
[PATCH] x86/apic: Fix use of X{,2}APIC_ENABLE in asm with older binutils
Posted by Andrew Cooper 11 months, 2 weeks ago
"x86/smpboot: Support parallel startup of secondary CPUs" adds the first use
of X2APIC_ENABLE in assembly, but older binutils don't tolerate the UL suffix.

Switch to using BIT() instead.

Fixes: 7e75178a0950 ("x86/smpboot: Support parallel startup of secondary CPUs")
Reported-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Tested-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
 arch/x86/include/asm/apicdef.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index bf546dfb6e58..4b125e5b3187 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_APICDEF_H
 #define _ASM_X86_APICDEF_H
 
+#include <linux/bits.h>
+
 /*
  * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
  *
@@ -140,8 +142,8 @@
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 #define APIC_BASE_MSR		0x800
 #define APIC_X2APIC_ID_MSR	0x802
-#define XAPIC_ENABLE	(1UL << 11)
-#define X2APIC_ENABLE	(1UL << 10)
+#define XAPIC_ENABLE		BIT(11)
+#define X2APIC_ENABLE		BIT(10)
 
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64

base-commit: 0c7ffa32dbd6b09a87fea4ad1de8b27145dfd9a6
-- 
2.30.2
Re: [PATCH] x86/apic: Fix use of X{,2}APIC_ENABLE in asm with older binutils
Posted by Russell King (Oracle) 11 months, 2 weeks ago
Hi,

Please can you tell me what the relevance of this patch is to me, and
thus why I'm included in the Cc list? I have never touched this file,
not in its current path nor a previous path according to git.

Thanks.

On Mon, May 22, 2023 at 11:57:38AM +0100, Andrew Cooper wrote:
> "x86/smpboot: Support parallel startup of secondary CPUs" adds the first use
> of X2APIC_ENABLE in assembly, but older binutils don't tolerate the UL suffix.
> 
> Switch to using BIT() instead.
> 
> Fixes: 7e75178a0950 ("x86/smpboot: Support parallel startup of secondary CPUs")
> Reported-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
> Tested-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
>  arch/x86/include/asm/apicdef.h | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
> index bf546dfb6e58..4b125e5b3187 100644
> --- a/arch/x86/include/asm/apicdef.h
> +++ b/arch/x86/include/asm/apicdef.h
> @@ -2,6 +2,8 @@
>  #ifndef _ASM_X86_APICDEF_H
>  #define _ASM_X86_APICDEF_H
>  
> +#include <linux/bits.h>
> +
>  /*
>   * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
>   *
> @@ -140,8 +142,8 @@
>  #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
>  #define APIC_BASE_MSR		0x800
>  #define APIC_X2APIC_ID_MSR	0x802
> -#define XAPIC_ENABLE	(1UL << 11)
> -#define X2APIC_ENABLE	(1UL << 10)
> +#define XAPIC_ENABLE		BIT(11)
> +#define X2APIC_ENABLE		BIT(10)
>  
>  #ifdef CONFIG_X86_32
>  # define MAX_IO_APICS 64
> 
> base-commit: 0c7ffa32dbd6b09a87fea4ad1de8b27145dfd9a6
> -- 
> 2.30.2
> 
> 

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!