[v2] Add NMI Support to RISC-V via SSE

[PATCH v2 5/8] riscv: smp: retry CPU stop with NMI if IPI fails

Posted by Yunhui Cui 2 months, 3 weeks ago

Retry CPU stop with NMI when IPI fails and RISC-V SSE NMI is supported,
borrowed the code implementation from arm64.

Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
 arch/riscv/include/asm/smp.h           |  2 ++
 arch/riscv/kernel/smp.c                | 23 +++++++++++++++++++----
 drivers/firmware/riscv/riscv_sse_nmi.c |  1 +
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index f53f1f0e7aa9e..e01ea962adfc4 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -63,6 +63,8 @@ static inline void cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
 }
 #endif
 
+void cpu_stop(void);
+
 /* Secondary hart entry */
 asmlinkage void smp_callin(void);
 
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 07ccc28f52172..aa1cfc344a2c6 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -69,7 +69,7 @@ int riscv_hartid_to_cpuid(unsigned long hartid)
 	return -ENOENT;
 }
 
-static void ipi_stop(void)
+void cpu_stop(void)
 {
 	set_cpu_online(smp_processor_id(), false);
 	while (1)
@@ -127,7 +127,7 @@ static irqreturn_t handle_IPI(int irq, void *data)
 		generic_smp_call_function_interrupt();
 		break;
 	case IPI_CPU_STOP:
-		ipi_stop();
+		cpu_stop();
 		break;
 	case IPI_CPU_CRASH_STOP:
 		cpu_crash_stop(cpu, get_irq_regs());
@@ -250,10 +250,9 @@ void tick_broadcast(const struct cpumask *mask)
 void smp_send_stop(void)
 {
 	unsigned long timeout;
+	cpumask_t mask;
 
 	if (num_online_cpus() > 1) {
-		cpumask_t mask;
-
 		cpumask_copy(&mask, cpu_online_mask);
 		cpumask_clear_cpu(smp_processor_id(), &mask);
 
@@ -267,6 +266,22 @@ void smp_send_stop(void)
 	while (num_online_cpus() > 1 && timeout--)
 		udelay(1);
 
+	/*
+	 * If CPUs are still online, try an NMI. There's no excuse for this to
+	 * be slow, so we only give them an extra 10 ms to respond.
+	 */
+	if (num_other_online_cpus()) {
+		smp_rmb();
+		cpumask_copy(&mask, cpu_online_mask);
+		cpumask_clear_cpu(smp_processor_id(), &mask);
+		pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
+			cpumask_pr_args(&mask));
+		send_nmi_mask(&mask, LOCAL_NMI_STOP);
+		timeout = USEC_PER_MSEC * 10;
+		while (num_other_online_cpus() && timeout--)
+			udelay(1);
+	}
+
 	if (num_online_cpus() > 1)
 		pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
 			   cpumask_pr_args(cpu_online_mask));
diff --git a/drivers/firmware/riscv/riscv_sse_nmi.c b/drivers/firmware/riscv/riscv_sse_nmi.c
index e4c20dce40f9a..0ff0bda53608a 100644
--- a/drivers/firmware/riscv/riscv_sse_nmi.c
+++ b/drivers/firmware/riscv/riscv_sse_nmi.c
@@ -55,6 +55,7 @@ static int local_nmi_handler(u32 evt, void *arg, struct pt_regs *regs)
 	unsigned int cpu = smp_processor_id();
 
 	NMI_HANDLE(LOCAL_NMI_CRASH, cpu_crash_stop, cpu, regs);
+	NMI_HANDLE(LOCAL_NMI_STOP, cpu_stop);
 
 	atomic_set(&local_nmi_arg, LOCAL_NMI_NONE);
 
-- 
2.39.5

[PATCH v2 1/8] drivers: firmware: riscv: add SSE NMI support
[PATCH v2 2/8] riscv: smp: move ipi_cpu_crash_stop() declaration to smp.h
[PATCH v2 3/8] smp: move num_other_online_cpus() into smp.h
[PATCH v2 4/8] riscv: smp: use NMI for crash stop
[PATCH v2 5/8] riscv: smp: retry CPU stop with NMI if IPI fails
[PATCH v2 6/8] riscv: smp: use NMI for backtrace
[PATCH v2 7/8] riscv: smp: kgdb: Use NMI for CPU roundup
[PATCH v2 8/8] drivers: firmware: riscv: add unknown nmi support