[PATCH v2] RISC-V: Implement kgdb_roundup_cpus() to enable future NMI Roundup

Jinjie Ruan posted 1 patch 1 month, 1 week ago
arch/riscv/kernel/smp.c | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
[PATCH v2] RISC-V: Implement kgdb_roundup_cpus() to enable future NMI Roundup
Posted by Jinjie Ruan 1 month, 1 week ago
Until now, the generic weak kgdb_roundup_cpus() has been used for kgdb on
RISCV. A custom one allows to debug CPUs that are stuck with interrupts
disabled with NMI support in the future. And using an IPI is better than
the generic one since it avoids the potential situation described in the
generic kgdb_call_nmi_hook(). As Andrew pointed out, once there is NMI
support, we can easily extend this and the CPU backtrace support
to use NMIs.

After this patch, the kgdb test show that:
	# echo g > /proc/sysrq-trigger
	[2]kdb> btc
	btc: cpu status: Currently on cpu 2
	Available cpus: 0-1(-), 2, 3(-)
	Stack traceback for pid 0
	0xffffffff81c13a40        0        0  1    0   -  0xffffffff81c14510  swapper/0
	CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.10.0-g3120273055b6-dirty #51
	Hardware name: riscv-virtio,qemu (DT)
	Call Trace:
	[<ffffffff80006c48>] dump_backtrace+0x28/0x30
	[<ffffffff80fceb38>] show_stack+0x38/0x44
	[<ffffffff80fe6a04>] dump_stack_lvl+0x58/0x7a
	[<ffffffff80fe6a3e>] dump_stack+0x18/0x20
	[<ffffffff801143fa>] kgdb_cpu_enter+0x682/0x6b2
	[<ffffffff801144ca>] kgdb_nmicallback+0xa0/0xac
	[<ffffffff8000a392>] handle_IPI+0x9c/0x120
	[<ffffffff800a2baa>] handle_percpu_devid_irq+0xa4/0x1e4
	[<ffffffff8009cca8>] generic_handle_domain_irq+0x28/0x36
	[<ffffffff800a9e5c>] ipi_mux_process+0xe8/0x110
	[<ffffffff806e1e30>] imsic_handle_irq+0xf8/0x13a
	[<ffffffff8009cca8>] generic_handle_domain_irq+0x28/0x36
	[<ffffffff806dff12>] riscv_intc_aia_irq+0x2e/0x40
	[<ffffffff80fe6ab0>] handle_riscv_irq+0x54/0x86
	[<ffffffff80ff2e4a>] call_on_irq_stack+0x32/0x40

Rebased on Ryo Takakura's "RISC-V: Enable IPI CPU Backtrace" patch.

Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
---
v2:
- Update the commit title and message.
- Add Reviewed-by.
---
 arch/riscv/kernel/smp.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 9b047899791c..c180a647a30e 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -13,6 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/kexec.h>
+#include <linux/kgdb.h>
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
@@ -35,6 +36,7 @@ enum ipi_message_type {
 	IPI_IRQ_WORK,
 	IPI_TIMER,
 	IPI_CPU_BACKTRACE,
+	IPI_KGDB_ROUNDUP,
 	IPI_MAX
 };
 
@@ -115,6 +117,7 @@ void arch_irq_work_raise(void)
 
 static irqreturn_t handle_IPI(int irq, void *data)
 {
+	unsigned int cpu = smp_processor_id();
 	int ipi = irq - ipi_virq_base;
 
 	switch (ipi) {
@@ -128,7 +131,7 @@ static irqreturn_t handle_IPI(int irq, void *data)
 		ipi_stop();
 		break;
 	case IPI_CPU_CRASH_STOP:
-		ipi_cpu_crash_stop(smp_processor_id(), get_irq_regs());
+		ipi_cpu_crash_stop(cpu, get_irq_regs());
 		break;
 	case IPI_IRQ_WORK:
 		irq_work_run();
@@ -141,8 +144,11 @@ static irqreturn_t handle_IPI(int irq, void *data)
 	case IPI_CPU_BACKTRACE:
 		nmi_cpu_backtrace(get_irq_regs());
 		break;
+	case IPI_KGDB_ROUNDUP:
+		kgdb_nmicallback(cpu, get_irq_regs());
+		break;
 	default:
-		pr_warn("CPU%d: unhandled IPI%d\n", smp_processor_id(), ipi);
+		pr_warn("CPU%d: unhandled IPI%d\n", cpu, ipi);
 		break;
 	}
 
@@ -209,6 +215,7 @@ static const char * const ipi_names[] = {
 	[IPI_IRQ_WORK]		= "IRQ work interrupts",
 	[IPI_TIMER]		= "Timer broadcast interrupts",
 	[IPI_CPU_BACKTRACE]     = "CPU backtrace interrupts",
+	[IPI_KGDB_ROUNDUP]	= "KGDB roundup interrupts",
 };
 
 void show_ipi_stats(struct seq_file *p, int prec)
@@ -339,3 +346,19 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
 {
 	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, riscv_backtrace_ipi);
 }
+
+#ifdef CONFIG_KGDB
+void kgdb_roundup_cpus(void)
+{
+	int this_cpu = raw_smp_processor_id();
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		/* No need to roundup ourselves */
+		if (cpu == this_cpu)
+			continue;
+
+		send_ipi_single(cpu, IPI_KGDB_ROUNDUP);
+	}
+}
+#endif
-- 
2.34.1
Re: [PATCH v2] RISC-V: Implement kgdb_roundup_cpus() to enable future NMI Roundup
Posted by Jinjie Ruan 2 weeks, 4 days ago
Ping.

On 2024/7/27 14:34, Jinjie Ruan wrote:
> Until now, the generic weak kgdb_roundup_cpus() has been used for kgdb on
> RISCV. A custom one allows to debug CPUs that are stuck with interrupts
> disabled with NMI support in the future. And using an IPI is better than
> the generic one since it avoids the potential situation described in the
> generic kgdb_call_nmi_hook(). As Andrew pointed out, once there is NMI
> support, we can easily extend this and the CPU backtrace support
> to use NMIs.
> 
> After this patch, the kgdb test show that:
> 	# echo g > /proc/sysrq-trigger
> 	[2]kdb> btc
> 	btc: cpu status: Currently on cpu 2
> 	Available cpus: 0-1(-), 2, 3(-)
> 	Stack traceback for pid 0
> 	0xffffffff81c13a40        0        0  1    0   -  0xffffffff81c14510  swapper/0
> 	CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.10.0-g3120273055b6-dirty #51
> 	Hardware name: riscv-virtio,qemu (DT)
> 	Call Trace:
> 	[<ffffffff80006c48>] dump_backtrace+0x28/0x30
> 	[<ffffffff80fceb38>] show_stack+0x38/0x44
> 	[<ffffffff80fe6a04>] dump_stack_lvl+0x58/0x7a
> 	[<ffffffff80fe6a3e>] dump_stack+0x18/0x20
> 	[<ffffffff801143fa>] kgdb_cpu_enter+0x682/0x6b2
> 	[<ffffffff801144ca>] kgdb_nmicallback+0xa0/0xac
> 	[<ffffffff8000a392>] handle_IPI+0x9c/0x120
> 	[<ffffffff800a2baa>] handle_percpu_devid_irq+0xa4/0x1e4
> 	[<ffffffff8009cca8>] generic_handle_domain_irq+0x28/0x36
> 	[<ffffffff800a9e5c>] ipi_mux_process+0xe8/0x110
> 	[<ffffffff806e1e30>] imsic_handle_irq+0xf8/0x13a
> 	[<ffffffff8009cca8>] generic_handle_domain_irq+0x28/0x36
> 	[<ffffffff806dff12>] riscv_intc_aia_irq+0x2e/0x40
> 	[<ffffffff80fe6ab0>] handle_riscv_irq+0x54/0x86
> 	[<ffffffff80ff2e4a>] call_on_irq_stack+0x32/0x40
> 
> Rebased on Ryo Takakura's "RISC-V: Enable IPI CPU Backtrace" patch.
> 
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> v2:
> - Update the commit title and message.
> - Add Reviewed-by.
> ---
>  arch/riscv/kernel/smp.c | 27 +++++++++++++++++++++++++--
>  1 file changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
> index 9b047899791c..c180a647a30e 100644
> --- a/arch/riscv/kernel/smp.c
> +++ b/arch/riscv/kernel/smp.c
> @@ -13,6 +13,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/module.h>
>  #include <linux/kexec.h>
> +#include <linux/kgdb.h>
>  #include <linux/percpu.h>
>  #include <linux/profile.h>
>  #include <linux/smp.h>
> @@ -35,6 +36,7 @@ enum ipi_message_type {
>  	IPI_IRQ_WORK,
>  	IPI_TIMER,
>  	IPI_CPU_BACKTRACE,
> +	IPI_KGDB_ROUNDUP,
>  	IPI_MAX
>  };
>  
> @@ -115,6 +117,7 @@ void arch_irq_work_raise(void)
>  
>  static irqreturn_t handle_IPI(int irq, void *data)
>  {
> +	unsigned int cpu = smp_processor_id();
>  	int ipi = irq - ipi_virq_base;
>  
>  	switch (ipi) {
> @@ -128,7 +131,7 @@ static irqreturn_t handle_IPI(int irq, void *data)
>  		ipi_stop();
>  		break;
>  	case IPI_CPU_CRASH_STOP:
> -		ipi_cpu_crash_stop(smp_processor_id(), get_irq_regs());
> +		ipi_cpu_crash_stop(cpu, get_irq_regs());
>  		break;
>  	case IPI_IRQ_WORK:
>  		irq_work_run();
> @@ -141,8 +144,11 @@ static irqreturn_t handle_IPI(int irq, void *data)
>  	case IPI_CPU_BACKTRACE:
>  		nmi_cpu_backtrace(get_irq_regs());
>  		break;
> +	case IPI_KGDB_ROUNDUP:
> +		kgdb_nmicallback(cpu, get_irq_regs());
> +		break;
>  	default:
> -		pr_warn("CPU%d: unhandled IPI%d\n", smp_processor_id(), ipi);
> +		pr_warn("CPU%d: unhandled IPI%d\n", cpu, ipi);
>  		break;
>  	}
>  
> @@ -209,6 +215,7 @@ static const char * const ipi_names[] = {
>  	[IPI_IRQ_WORK]		= "IRQ work interrupts",
>  	[IPI_TIMER]		= "Timer broadcast interrupts",
>  	[IPI_CPU_BACKTRACE]     = "CPU backtrace interrupts",
> +	[IPI_KGDB_ROUNDUP]	= "KGDB roundup interrupts",
>  };
>  
>  void show_ipi_stats(struct seq_file *p, int prec)
> @@ -339,3 +346,19 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
>  	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, riscv_backtrace_ipi);
>  }
> +
> +#ifdef CONFIG_KGDB
> +void kgdb_roundup_cpus(void)
> +{
> +	int this_cpu = raw_smp_processor_id();
> +	int cpu;
> +
> +	for_each_online_cpu(cpu) {
> +		/* No need to roundup ourselves */
> +		if (cpu == this_cpu)
> +			continue;
> +
> +		send_ipi_single(cpu, IPI_KGDB_ROUNDUP);
> +	}
> +}
> +#endif