[PATCH 2/3] LoongArch: Add kdump support

Youling Tang posted 3 patches 3 years, 7 months ago
There is a newer version of this series
[PATCH 2/3] LoongArch: Add kdump support
Posted by Youling Tang 3 years, 7 months ago
This patch adds support for kdump, the kernel will reserve a region
for the crash kernel and jump there on panic.

Arch-specific functions are added to allow for implementing a crash
dump file interface, /proc/vmcore, which can be viewed as a ELF file.

A user space tool, like kexec-tools, is responsible for allocating a
separate region for the core's ELF header within crash kdump kernel
memory and filling it in when executing kexec_load().

Then, its location will be advertised to crash dump kernel via a new
device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
the region for later use with fdt_reserve_elfcorehdr() at boot time.

At the same time, it will also limit the crash kdump kernel to the
crashkernel area via a new device-tree property, "linux, usable-memory-range",
so as not to destroy the original kernel dump data.

On crash dump kernel, /proc/vmcore will access the primary kernel's memory
with copy_oldmem_page().

I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
triggering a crash through /proc/sysrq_trigger:

 $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
 # echo c > /proc/sysrq_trigger

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
---
 arch/loongarch/Kconfig                  |  22 ++++++
 arch/loongarch/Makefile                 |   4 +
 arch/loongarch/kernel/Makefile          |   3 +-
 arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
 arch/loongarch/kernel/crash_dump.c      |  19 +++++
 arch/loongarch/kernel/machine_kexec.c   |  12 ++-
 arch/loongarch/kernel/mem.c             |   6 ++
 arch/loongarch/kernel/relocate_kernel.S |   6 ++
 arch/loongarch/kernel/setup.c           |  49 ++++++++++++
 arch/loongarch/kernel/traps.c           |   4 +
 10 files changed, 217 insertions(+), 8 deletions(-)
 create mode 100644 arch/loongarch/kernel/crash.c
 create mode 100644 arch/loongarch/kernel/crash_dump.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 903c82fa958d..7c1b07a5b5bd 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -420,6 +420,28 @@ config KEXEC
 
 	  The name comes from the similarity to the exec system call.
 
+config CRASH_DUMP
+	bool "Build kdump crash kernel"
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec.
+
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
+
+config PHYSICAL_START
+	hex "Physical address where the kernel is loaded"
+	default "0x9000000091000000" if 64BIT
+	depends on CRASH_DUMP
+	help
+	  This gives the XKPRANGE address where the kernel is loaded.
+	  If you plan to use kernel for capturing the crash dump change
+	  this value to start of the reserved region (the "X" value as
+	  specified in the "crashkernel=YM@XM" command line boot parameter
+	  passed to the panic-ed kernel).
+
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 4bc47f47cfd8..7dabd580426d 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE		+= -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
 cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
 
+ifdef CONFIG_PHYSICAL_START
+load-y		= $(CONFIG_PHYSICAL_START)
+else
 load-y		= 0x9000000000200000
+endif
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y)
 
 drivers-$(CONFIG_PCI)		+= arch/loongarch/pci/
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 20b64ac3f128..df5aea129364 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
 obj-$(CONFIG_MODULES)		+= module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
-obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
 
 obj-$(CONFIG_PROC_FS)		+= proc.o
 
diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
new file mode 100644
index 000000000000..b4f249ec6301
--- /dev/null
+++ b/arch/loongarch/kernel/crash.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS
+ */
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm/cacheflush.h>
+#include <asm/kexec.h>
+
+static cpumask_t cpus_in_crash = CPU_MASK_NONE;
+
+#ifdef CONFIG_SMP
+static void crash_shutdown_secondary(void *passed_regs)
+{
+	struct pt_regs *regs = passed_regs;
+	int cpu = smp_processor_id();
+
+	/*
+	 * If we are passed registers, use those.  Otherwise get the
+	 * regs from the last interrupt, which should be correct, as
+	 * we are in an interrupt.  But if the regs are not there,
+	 * pull them from the top of the stack.  They are probably
+	 * wrong, but we need something to keep from crashing again.
+	 */
+	if (!regs)
+		regs = get_irq_regs();
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	local_irq_disable();
+	if (!cpumask_test_cpu(cpu, &cpus_in_crash))
+		crash_save_cpu(regs, cpu);
+	cpumask_set_cpu(cpu, &cpus_in_crash);
+
+	while (!atomic_read(&kexec_ready_to_reboot))
+		cpu_relax();
+
+	kexec_reboot();
+}
+
+/* Override the weak function in kernel/panic.c */
+void crash_smp_send_stop(void)
+{
+	static int cpus_stopped;
+	unsigned long timeout;
+	unsigned int ncpus;
+
+	/*
+	 * This function can be called twice in panic path, but obviously
+	 * we execute this only once.
+	 */
+	if (cpus_stopped)
+		return;
+
+	cpus_stopped = 1;
+
+	 /* Excluding the panic cpu */
+	ncpus = num_online_cpus() - 1;
+
+	smp_call_function(crash_shutdown_secondary, NULL, 0);
+	smp_wmb();
+
+	/*
+	 * The crash CPU sends an IPI and wait for other CPUs to
+	 * respond. Delay of at least 10 seconds.
+	 */
+	pr_emerg("Sending IPI to other cpus...\n");
+	timeout = USEC_PER_SEC * 10;
+	while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
+		cpu_relax();
+		udelay(1);
+	}
+}
+
+#endif
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	int crashing_cpu;
+
+	local_irq_disable();
+
+	crashing_cpu = smp_processor_id();
+	crash_save_cpu(regs, crashing_cpu);
+
+	/* shutdown non-crashing cpus */
+	crash_smp_send_stop();
+	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
+
+	pr_info("Starting crashdump kernel...\n");
+}
diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
new file mode 100644
index 000000000000..13e5d2f7870d
--- /dev/null
+++ b/arch/loongarch/kernel/crash_dump.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/highmem.h>
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+			 size_t csize, unsigned long offset)
+{
+	void  *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = kmap_local_pfn(pfn);
+	csize = copy_to_iter(vaddr + offset, csize, iter);
+	kunmap_local(vaddr);
+
+	return csize;
+}
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
index 4ffcd4cd9c8c..f793a3ff09a3 100644
--- a/arch/loongarch/kernel/machine_kexec.c
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
 		continue;
 	}
 
-	/* kexec need a safe page to save reboot_code_buffer */
+	/* kexec/kdump need a safe page to save reboot_code_buffer */
 	kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
 
 	reboot_code_buffer =
@@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
 
 	kexec_reboot();
 }
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-}
 #endif
 
 void machine_shutdown(void)
@@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
 
 	jump_addr = (unsigned long)phys_to_virt(image->start);
 
-	first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
 
 	/*
 	 * The generic kexec code builds a page list with physical
@@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
 
 	/*
 	 * We know we were online, and there will be no incoming IPIs at
-	 * this point.
+	 * this point. Mark online again before rebooting so that the crash
+	 * analysis tool will see us correctly.
 	 */
 	set_cpu_online(smp_processor_id(), true);
 
diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
index 7423361b0ebc..c6def6ff81c8 100644
--- a/arch/loongarch/kernel/mem.c
+++ b/arch/loongarch/kernel/mem.c
@@ -5,6 +5,7 @@
 #include <linux/efi.h>
 #include <linux/initrd.h>
 #include <linux/memblock.h>
+#include <linux/of_fdt.h>
 
 #include <asm/bootinfo.h>
 #include <asm/loongson.h>
@@ -61,4 +62,9 @@ void __init memblock_init(void)
 
 	/* Reserve the initrd */
 	reserve_initrd_mem();
+
+	/* Mainly reserved memory for the elf core head */
+	early_init_fdt_scan_reserved_mem();
+	/* Parse linux,usable-memory-range is for crash dump kernel */
+	early_init_dt_check_for_usable_mem_range();
 }
diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
index d1f242f74ea8..4ee5ac4ac2d7 100644
--- a/arch/loongarch/kernel/relocate_kernel.S
+++ b/arch/loongarch/kernel/relocate_kernel.S
@@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
 	move		s2, a2
 	move		s3, a3
 
+	/*
+	 * In case of a kdump/crash kernel, the indirection page is not
+	 * populated as the kernel is directly copied to a reserved location
+	 */
+	beqz		s2, done
+
 process_entry:
 	PTR_L		s4, s2, 0
 	PTR_ADDI	s2, s2, SZREG
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index f938aae3e92c..ea34b77e402f 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -19,6 +19,8 @@
 #include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <linux/ioport.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/pfn.h>
@@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
 }
 early_param("mem", early_parse_mem);
 
+static void __init loongarch_parse_crashkernel(void)
+{
+#ifdef CONFIG_KEXEC
+	unsigned long long start;
+	unsigned long long total_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	total_mem = memblock_phys_mem_size();
+	ret = parse_crashkernel(boot_command_line, total_mem,
+				&crash_size, &crash_base);
+	if (ret != 0 || crash_size <= 0)
+		return;
+
+
+	start = memblock_phys_alloc_range(crash_size, 1, crash_base,
+					crash_base + crash_size);
+	if (start != crash_base) {
+		pr_warn("Invalid memory region reserved for crash kernel\n");
+		return;
+	}
+
+	crashk_res.start = crash_base;
+	crashk_res.end	 = crash_base + crash_size - 1;
+#endif
+}
+
+static void __init request_crashkernel(struct resource *res)
+{
+#ifdef CONFIG_KEXEC
+	int ret;
+
+	if (crashk_res.start == crashk_res.end)
+		return;
+
+	ret = request_resource(res, &crashk_res);
+	if (!ret)
+		pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
+			(unsigned long)((crashk_res.end -
+					 crashk_res.start + 1) >> 20),
+			(unsigned long)(crashk_res.start  >> 20));
+#endif
+}
+
 void __init platform_init(void)
 {
 	efi_init();
@@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
 
 	check_kernel_sections_mem();
 
+	loongarch_parse_crashkernel();
+
 	/*
 	 * In order to reduce the possibility of kernel panic when failed to
 	 * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
@@ -290,6 +338,7 @@ static void __init resource_init(void)
 		request_resource(res, &code_resource);
 		request_resource(res, &data_resource);
 		request_resource(res, &bss_resource);
+		request_crashkernel(res);
 	}
 }
 
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index aa1c95aaf595..0e610872f3f4 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -10,6 +10,7 @@
 #include <linux/entry-common.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/extable.h>
 #include <linux/mm.h>
@@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 
 	oops_exit();
 
+	if (regs && kexec_should_crash(current))
+		crash_kexec(regs);
+
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 
-- 
2.36.0
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Huacai Chen 3 years, 7 months ago
Hi, Youling,

I think crash.c can be merged into crash_dump.c

Huacai

On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>
> This patch adds support for kdump, the kernel will reserve a region
> for the crash kernel and jump there on panic.
>
> Arch-specific functions are added to allow for implementing a crash
> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>
> A user space tool, like kexec-tools, is responsible for allocating a
> separate region for the core's ELF header within crash kdump kernel
> memory and filling it in when executing kexec_load().
>
> Then, its location will be advertised to crash dump kernel via a new
> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>
> At the same time, it will also limit the crash kdump kernel to the
> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> so as not to destroy the original kernel dump data.
>
> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> with copy_oldmem_page().
>
> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> triggering a crash through /proc/sysrq_trigger:
>
>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>  # echo c > /proc/sysrq_trigger
>
> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> ---
>  arch/loongarch/Kconfig                  |  22 ++++++
>  arch/loongarch/Makefile                 |   4 +
>  arch/loongarch/kernel/Makefile          |   3 +-
>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>  arch/loongarch/kernel/mem.c             |   6 ++
>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>  arch/loongarch/kernel/traps.c           |   4 +
>  10 files changed, 217 insertions(+), 8 deletions(-)
>  create mode 100644 arch/loongarch/kernel/crash.c
>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 903c82fa958d..7c1b07a5b5bd 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -420,6 +420,28 @@ config KEXEC
>
>           The name comes from the similarity to the exec system call.
>
> +config CRASH_DUMP
> +       bool "Build kdump crash kernel"
> +       help
> +         Generate crash dump after being started by kexec. This should
> +         be normally only set in special crash dump kernels which are
> +         loaded in the main kernel with kexec-tools into a specially
> +         reserved region and then later executed after a crash by
> +         kdump/kexec.
> +
> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> +
> +config PHYSICAL_START
> +       hex "Physical address where the kernel is loaded"
> +       default "0x9000000091000000" if 64BIT
> +       depends on CRASH_DUMP
> +       help
> +         This gives the XKPRANGE address where the kernel is loaded.
> +         If you plan to use kernel for capturing the crash dump change
> +         this value to start of the reserved region (the "X" value as
> +         specified in the "crashkernel=YM@XM" command line boot parameter
> +         passed to the panic-ed kernel).
> +
>  config SECCOMP
>         bool "Enable seccomp to safely compute untrusted bytecode"
>         depends on PROC_FS
> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> index 4bc47f47cfd8..7dabd580426d 100644
> --- a/arch/loongarch/Makefile
> +++ b/arch/loongarch/Makefile
> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>  cflags-y += -ffreestanding
>  cflags-y += $(call cc-option, -mno-check-zero-division)
>
> +ifdef CONFIG_PHYSICAL_START
> +load-y         = $(CONFIG_PHYSICAL_START)
> +else
>  load-y         = 0x9000000000200000
> +endif
>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>
>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 20b64ac3f128..df5aea129364 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>
> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>
>  obj-$(CONFIG_PROC_FS)          += proc.o
>
> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> new file mode 100644
> index 000000000000..b4f249ec6301
> --- /dev/null
> +++ b/arch/loongarch/kernel/crash.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + *
> + * Derived from MIPS
> + */
> +#include <linux/kernel.h>
> +#include <linux/smp.h>
> +#include <linux/reboot.h>
> +#include <linux/crash_dump.h>
> +#include <linux/delay.h>
> +#include <linux/irq.h>
> +#include <linux/types.h>
> +#include <linux/sched.h>
> +#include <linux/sched/task_stack.h>
> +#include <asm/cacheflush.h>
> +#include <asm/kexec.h>
> +
> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> +
> +#ifdef CONFIG_SMP
> +static void crash_shutdown_secondary(void *passed_regs)
> +{
> +       struct pt_regs *regs = passed_regs;
> +       int cpu = smp_processor_id();
> +
> +       /*
> +        * If we are passed registers, use those.  Otherwise get the
> +        * regs from the last interrupt, which should be correct, as
> +        * we are in an interrupt.  But if the regs are not there,
> +        * pull them from the top of the stack.  They are probably
> +        * wrong, but we need something to keep from crashing again.
> +        */
> +       if (!regs)
> +               regs = get_irq_regs();
> +       if (!regs)
> +               regs = task_pt_regs(current);
> +
> +       local_irq_disable();
> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> +               crash_save_cpu(regs, cpu);
> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> +
> +       while (!atomic_read(&kexec_ready_to_reboot))
> +               cpu_relax();
> +
> +       kexec_reboot();
> +}
> +
> +/* Override the weak function in kernel/panic.c */
> +void crash_smp_send_stop(void)
> +{
> +       static int cpus_stopped;
> +       unsigned long timeout;
> +       unsigned int ncpus;
> +
> +       /*
> +        * This function can be called twice in panic path, but obviously
> +        * we execute this only once.
> +        */
> +       if (cpus_stopped)
> +               return;
> +
> +       cpus_stopped = 1;
> +
> +        /* Excluding the panic cpu */
> +       ncpus = num_online_cpus() - 1;
> +
> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> +       smp_wmb();
> +
> +       /*
> +        * The crash CPU sends an IPI and wait for other CPUs to
> +        * respond. Delay of at least 10 seconds.
> +        */
> +       pr_emerg("Sending IPI to other cpus...\n");
> +       timeout = USEC_PER_SEC * 10;
> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> +               cpu_relax();
> +               udelay(1);
> +       }
> +}
> +
> +#endif
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> +       int crashing_cpu;
> +
> +       local_irq_disable();
> +
> +       crashing_cpu = smp_processor_id();
> +       crash_save_cpu(regs, crashing_cpu);
> +
> +       /* shutdown non-crashing cpus */
> +       crash_smp_send_stop();
> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> +
> +       pr_info("Starting crashdump kernel...\n");
> +}
> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> new file mode 100644
> index 000000000000..13e5d2f7870d
> --- /dev/null
> +++ b/arch/loongarch/kernel/crash_dump.c
> @@ -0,0 +1,19 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/highmem.h>
> +#include <linux/crash_dump.h>
> +#include <linux/io.h>
> +
> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> +                        size_t csize, unsigned long offset)
> +{
> +       void  *vaddr;
> +
> +       if (!csize)
> +               return 0;
> +
> +       vaddr = kmap_local_pfn(pfn);
> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> +       kunmap_local(vaddr);
> +
> +       return csize;
> +}
> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> --- a/arch/loongarch/kernel/machine_kexec.c
> +++ b/arch/loongarch/kernel/machine_kexec.c
> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>                 continue;
>         }
>
> -       /* kexec need a safe page to save reboot_code_buffer */
> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>
>         reboot_code_buffer =
> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>
>         kexec_reboot();
>  }
> -
> -void machine_crash_shutdown(struct pt_regs *regs)
> -{
> -}
>  #endif
>
>  void machine_shutdown(void)
> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>
>         jump_addr = (unsigned long)phys_to_virt(image->start);
>
> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> +       if (image->type == KEXEC_TYPE_DEFAULT)
> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>
>         /*
>          * The generic kexec code builds a page list with physical
> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>
>         /*
>          * We know we were online, and there will be no incoming IPIs at
> -        * this point.
> +        * this point. Mark online again before rebooting so that the crash
> +        * analysis tool will see us correctly.
>          */
>         set_cpu_online(smp_processor_id(), true);
>
> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> index 7423361b0ebc..c6def6ff81c8 100644
> --- a/arch/loongarch/kernel/mem.c
> +++ b/arch/loongarch/kernel/mem.c
> @@ -5,6 +5,7 @@
>  #include <linux/efi.h>
>  #include <linux/initrd.h>
>  #include <linux/memblock.h>
> +#include <linux/of_fdt.h>
>
>  #include <asm/bootinfo.h>
>  #include <asm/loongson.h>
> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>
>         /* Reserve the initrd */
>         reserve_initrd_mem();
> +
> +       /* Mainly reserved memory for the elf core head */
> +       early_init_fdt_scan_reserved_mem();
> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> +       early_init_dt_check_for_usable_mem_range();
>  }
> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> index d1f242f74ea8..4ee5ac4ac2d7 100644
> --- a/arch/loongarch/kernel/relocate_kernel.S
> +++ b/arch/loongarch/kernel/relocate_kernel.S
> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>         move            s2, a2
>         move            s3, a3
>
> +       /*
> +        * In case of a kdump/crash kernel, the indirection page is not
> +        * populated as the kernel is directly copied to a reserved location
> +        */
> +       beqz            s2, done
> +
>  process_entry:
>         PTR_L           s4, s2, 0
>         PTR_ADDI        s2, s2, SZREG
> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> index f938aae3e92c..ea34b77e402f 100644
> --- a/arch/loongarch/kernel/setup.c
> +++ b/arch/loongarch/kernel/setup.c
> @@ -19,6 +19,8 @@
>  #include <linux/memblock.h>
>  #include <linux/initrd.h>
>  #include <linux/ioport.h>
> +#include <linux/kexec.h>
> +#include <linux/crash_dump.h>
>  #include <linux/root_dev.h>
>  #include <linux/console.h>
>  #include <linux/pfn.h>
> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>  }
>  early_param("mem", early_parse_mem);
>
> +static void __init loongarch_parse_crashkernel(void)
> +{
> +#ifdef CONFIG_KEXEC
> +       unsigned long long start;
> +       unsigned long long total_mem;
> +       unsigned long long crash_size, crash_base;
> +       int ret;
> +
> +       total_mem = memblock_phys_mem_size();
> +       ret = parse_crashkernel(boot_command_line, total_mem,
> +                               &crash_size, &crash_base);
> +       if (ret != 0 || crash_size <= 0)
> +               return;
> +
> +
> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> +                                       crash_base + crash_size);
> +       if (start != crash_base) {
> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> +               return;
> +       }
> +
> +       crashk_res.start = crash_base;
> +       crashk_res.end   = crash_base + crash_size - 1;
> +#endif
> +}
> +
> +static void __init request_crashkernel(struct resource *res)
> +{
> +#ifdef CONFIG_KEXEC
> +       int ret;
> +
> +       if (crashk_res.start == crashk_res.end)
> +               return;
> +
> +       ret = request_resource(res, &crashk_res);
> +       if (!ret)
> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> +                       (unsigned long)((crashk_res.end -
> +                                        crashk_res.start + 1) >> 20),
> +                       (unsigned long)(crashk_res.start  >> 20));
> +#endif
> +}
> +
>  void __init platform_init(void)
>  {
>         efi_init();
> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>
>         check_kernel_sections_mem();
>
> +       loongarch_parse_crashkernel();
> +
>         /*
>          * In order to reduce the possibility of kernel panic when failed to
>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>                 request_resource(res, &code_resource);
>                 request_resource(res, &data_resource);
>                 request_resource(res, &bss_resource);
> +               request_crashkernel(res);
>         }
>  }
>
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index aa1c95aaf595..0e610872f3f4 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -10,6 +10,7 @@
>  #include <linux/entry-common.h>
>  #include <linux/init.h>
>  #include <linux/kernel.h>
> +#include <linux/kexec.h>
>  #include <linux/module.h>
>  #include <linux/extable.h>
>  #include <linux/mm.h>
> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>
>         oops_exit();
>
> +       if (regs && kexec_should_crash(current))
> +               crash_kexec(regs);
> +
>         if (in_interrupt())
>                 panic("Fatal exception in interrupt");
>
> --
> 2.36.0
>
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Youling Tang 3 years, 7 months ago
Hi, Huacai

On 09/04/2022 08:21 PM, Huacai Chen wrote:
> Hi, Youling,
>
> I think crash.c can be merged into crash_dump.c

Most architectures only implement copy_oldmem_page() in crash_dump.c, 
I'm not sure if merging crash.c into crash_dump.c will break its 
consistency?

Thanks,
Youling

>
> Huacai
>
> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> This patch adds support for kdump, the kernel will reserve a region
>> for the crash kernel and jump there on panic.
>>
>> Arch-specific functions are added to allow for implementing a crash
>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>
>> A user space tool, like kexec-tools, is responsible for allocating a
>> separate region for the core's ELF header within crash kdump kernel
>> memory and filling it in when executing kexec_load().
>>
>> Then, its location will be advertised to crash dump kernel via a new
>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>
>> At the same time, it will also limit the crash kdump kernel to the
>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>> so as not to destroy the original kernel dump data.
>>
>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>> with copy_oldmem_page().
>>
>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>> triggering a crash through /proc/sysrq_trigger:
>>
>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>  # echo c > /proc/sysrq_trigger
>>
>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>> ---
>>  arch/loongarch/Kconfig                  |  22 ++++++
>>  arch/loongarch/Makefile                 |   4 +
>>  arch/loongarch/kernel/Makefile          |   3 +-
>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>  arch/loongarch/kernel/mem.c             |   6 ++
>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>  arch/loongarch/kernel/traps.c           |   4 +
>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>  create mode 100644 arch/loongarch/kernel/crash.c
>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 903c82fa958d..7c1b07a5b5bd 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -420,6 +420,28 @@ config KEXEC
>>
>>           The name comes from the similarity to the exec system call.
>>
>> +config CRASH_DUMP
>> +       bool "Build kdump crash kernel"
>> +       help
>> +         Generate crash dump after being started by kexec. This should
>> +         be normally only set in special crash dump kernels which are
>> +         loaded in the main kernel with kexec-tools into a specially
>> +         reserved region and then later executed after a crash by
>> +         kdump/kexec.
>> +
>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>> +
>> +config PHYSICAL_START
>> +       hex "Physical address where the kernel is loaded"
>> +       default "0x9000000091000000" if 64BIT
>> +       depends on CRASH_DUMP
>> +       help
>> +         This gives the XKPRANGE address where the kernel is loaded.
>> +         If you plan to use kernel for capturing the crash dump change
>> +         this value to start of the reserved region (the "X" value as
>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>> +         passed to the panic-ed kernel).
>> +
>>  config SECCOMP
>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>         depends on PROC_FS
>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>> index 4bc47f47cfd8..7dabd580426d 100644
>> --- a/arch/loongarch/Makefile
>> +++ b/arch/loongarch/Makefile
>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>  cflags-y += -ffreestanding
>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>
>> +ifdef CONFIG_PHYSICAL_START
>> +load-y         = $(CONFIG_PHYSICAL_START)
>> +else
>>  load-y         = 0x9000000000200000
>> +endif
>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>
>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>> index 20b64ac3f128..df5aea129364 100644
>> --- a/arch/loongarch/kernel/Makefile
>> +++ b/arch/loongarch/kernel/Makefile
>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>
>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>
>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>
>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>> new file mode 100644
>> index 000000000000..b4f249ec6301
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/crash.c
>> @@ -0,0 +1,100 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>> + *
>> + * Derived from MIPS
>> + */
>> +#include <linux/kernel.h>
>> +#include <linux/smp.h>
>> +#include <linux/reboot.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/delay.h>
>> +#include <linux/irq.h>
>> +#include <linux/types.h>
>> +#include <linux/sched.h>
>> +#include <linux/sched/task_stack.h>
>> +#include <asm/cacheflush.h>
>> +#include <asm/kexec.h>
>> +
>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>> +
>> +#ifdef CONFIG_SMP
>> +static void crash_shutdown_secondary(void *passed_regs)
>> +{
>> +       struct pt_regs *regs = passed_regs;
>> +       int cpu = smp_processor_id();
>> +
>> +       /*
>> +        * If we are passed registers, use those.  Otherwise get the
>> +        * regs from the last interrupt, which should be correct, as
>> +        * we are in an interrupt.  But if the regs are not there,
>> +        * pull them from the top of the stack.  They are probably
>> +        * wrong, but we need something to keep from crashing again.
>> +        */
>> +       if (!regs)
>> +               regs = get_irq_regs();
>> +       if (!regs)
>> +               regs = task_pt_regs(current);
>> +
>> +       local_irq_disable();
>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>> +               crash_save_cpu(regs, cpu);
>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>> +
>> +       while (!atomic_read(&kexec_ready_to_reboot))
>> +               cpu_relax();
>> +
>> +       kexec_reboot();
>> +}
>> +
>> +/* Override the weak function in kernel/panic.c */
>> +void crash_smp_send_stop(void)
>> +{
>> +       static int cpus_stopped;
>> +       unsigned long timeout;
>> +       unsigned int ncpus;
>> +
>> +       /*
>> +        * This function can be called twice in panic path, but obviously
>> +        * we execute this only once.
>> +        */
>> +       if (cpus_stopped)
>> +               return;
>> +
>> +       cpus_stopped = 1;
>> +
>> +        /* Excluding the panic cpu */
>> +       ncpus = num_online_cpus() - 1;
>> +
>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>> +       smp_wmb();
>> +
>> +       /*
>> +        * The crash CPU sends an IPI and wait for other CPUs to
>> +        * respond. Delay of at least 10 seconds.
>> +        */
>> +       pr_emerg("Sending IPI to other cpus...\n");
>> +       timeout = USEC_PER_SEC * 10;
>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>> +               cpu_relax();
>> +               udelay(1);
>> +       }
>> +}
>> +
>> +#endif
>> +
>> +void machine_crash_shutdown(struct pt_regs *regs)
>> +{
>> +       int crashing_cpu;
>> +
>> +       local_irq_disable();
>> +
>> +       crashing_cpu = smp_processor_id();
>> +       crash_save_cpu(regs, crashing_cpu);
>> +
>> +       /* shutdown non-crashing cpus */
>> +       crash_smp_send_stop();
>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>> +
>> +       pr_info("Starting crashdump kernel...\n");
>> +}
>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>> new file mode 100644
>> index 000000000000..13e5d2f7870d
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/crash_dump.c
>> @@ -0,0 +1,19 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#include <linux/highmem.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/io.h>
>> +
>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>> +                        size_t csize, unsigned long offset)
>> +{
>> +       void  *vaddr;
>> +
>> +       if (!csize)
>> +               return 0;
>> +
>> +       vaddr = kmap_local_pfn(pfn);
>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>> +       kunmap_local(vaddr);
>> +
>> +       return csize;
>> +}
>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>> --- a/arch/loongarch/kernel/machine_kexec.c
>> +++ b/arch/loongarch/kernel/machine_kexec.c
>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>                 continue;
>>         }
>>
>> -       /* kexec need a safe page to save reboot_code_buffer */
>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>
>>         reboot_code_buffer =
>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>
>>         kexec_reboot();
>>  }
>> -
>> -void machine_crash_shutdown(struct pt_regs *regs)
>> -{
>> -}
>>  #endif
>>
>>  void machine_shutdown(void)
>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>
>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>
>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>
>>         /*
>>          * The generic kexec code builds a page list with physical
>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>
>>         /*
>>          * We know we were online, and there will be no incoming IPIs at
>> -        * this point.
>> +        * this point. Mark online again before rebooting so that the crash
>> +        * analysis tool will see us correctly.
>>          */
>>         set_cpu_online(smp_processor_id(), true);
>>
>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>> index 7423361b0ebc..c6def6ff81c8 100644
>> --- a/arch/loongarch/kernel/mem.c
>> +++ b/arch/loongarch/kernel/mem.c
>> @@ -5,6 +5,7 @@
>>  #include <linux/efi.h>
>>  #include <linux/initrd.h>
>>  #include <linux/memblock.h>
>> +#include <linux/of_fdt.h>
>>
>>  #include <asm/bootinfo.h>
>>  #include <asm/loongson.h>
>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>
>>         /* Reserve the initrd */
>>         reserve_initrd_mem();
>> +
>> +       /* Mainly reserved memory for the elf core head */
>> +       early_init_fdt_scan_reserved_mem();
>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>> +       early_init_dt_check_for_usable_mem_range();
>>  }
>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>> --- a/arch/loongarch/kernel/relocate_kernel.S
>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>         move            s2, a2
>>         move            s3, a3
>>
>> +       /*
>> +        * In case of a kdump/crash kernel, the indirection page is not
>> +        * populated as the kernel is directly copied to a reserved location
>> +        */
>> +       beqz            s2, done
>> +
>>  process_entry:
>>         PTR_L           s4, s2, 0
>>         PTR_ADDI        s2, s2, SZREG
>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>> index f938aae3e92c..ea34b77e402f 100644
>> --- a/arch/loongarch/kernel/setup.c
>> +++ b/arch/loongarch/kernel/setup.c
>> @@ -19,6 +19,8 @@
>>  #include <linux/memblock.h>
>>  #include <linux/initrd.h>
>>  #include <linux/ioport.h>
>> +#include <linux/kexec.h>
>> +#include <linux/crash_dump.h>
>>  #include <linux/root_dev.h>
>>  #include <linux/console.h>
>>  #include <linux/pfn.h>
>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>  }
>>  early_param("mem", early_parse_mem);
>>
>> +static void __init loongarch_parse_crashkernel(void)
>> +{
>> +#ifdef CONFIG_KEXEC
>> +       unsigned long long start;
>> +       unsigned long long total_mem;
>> +       unsigned long long crash_size, crash_base;
>> +       int ret;
>> +
>> +       total_mem = memblock_phys_mem_size();
>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>> +                               &crash_size, &crash_base);
>> +       if (ret != 0 || crash_size <= 0)
>> +               return;
>> +
>> +
>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>> +                                       crash_base + crash_size);
>> +       if (start != crash_base) {
>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>> +               return;
>> +       }
>> +
>> +       crashk_res.start = crash_base;
>> +       crashk_res.end   = crash_base + crash_size - 1;
>> +#endif
>> +}
>> +
>> +static void __init request_crashkernel(struct resource *res)
>> +{
>> +#ifdef CONFIG_KEXEC
>> +       int ret;
>> +
>> +       if (crashk_res.start == crashk_res.end)
>> +               return;
>> +
>> +       ret = request_resource(res, &crashk_res);
>> +       if (!ret)
>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>> +                       (unsigned long)((crashk_res.end -
>> +                                        crashk_res.start + 1) >> 20),
>> +                       (unsigned long)(crashk_res.start  >> 20));
>> +#endif
>> +}
>> +
>>  void __init platform_init(void)
>>  {
>>         efi_init();
>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>
>>         check_kernel_sections_mem();
>>
>> +       loongarch_parse_crashkernel();
>> +
>>         /*
>>          * In order to reduce the possibility of kernel panic when failed to
>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>                 request_resource(res, &code_resource);
>>                 request_resource(res, &data_resource);
>>                 request_resource(res, &bss_resource);
>> +               request_crashkernel(res);
>>         }
>>  }
>>
>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>> index aa1c95aaf595..0e610872f3f4 100644
>> --- a/arch/loongarch/kernel/traps.c
>> +++ b/arch/loongarch/kernel/traps.c
>> @@ -10,6 +10,7 @@
>>  #include <linux/entry-common.h>
>>  #include <linux/init.h>
>>  #include <linux/kernel.h>
>> +#include <linux/kexec.h>
>>  #include <linux/module.h>
>>  #include <linux/extable.h>
>>  #include <linux/mm.h>
>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>
>>         oops_exit();
>>
>> +       if (regs && kexec_should_crash(current))
>> +               crash_kexec(regs);
>> +
>>         if (in_interrupt())
>>                 panic("Fatal exception in interrupt");
>>
>> --
>> 2.36.0
>>
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Huacai Chen 3 years, 7 months ago
Hi, Youling,

On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > I think crash.c can be merged into crash_dump.c
>
> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> I'm not sure if merging crash.c into crash_dump.c will break its
> consistency?
>
> Thanks,
> Youling
Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
can be merged into machine_kexec.c, as arm64 and riscv do.

Huacai
>
> >
> > Huacai
> >
> > On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> This patch adds support for kdump, the kernel will reserve a region
> >> for the crash kernel and jump there on panic.
> >>
> >> Arch-specific functions are added to allow for implementing a crash
> >> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>
> >> A user space tool, like kexec-tools, is responsible for allocating a
> >> separate region for the core's ELF header within crash kdump kernel
> >> memory and filling it in when executing kexec_load().
> >>
> >> Then, its location will be advertised to crash dump kernel via a new
> >> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>
> >> At the same time, it will also limit the crash kdump kernel to the
> >> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >> so as not to destroy the original kernel dump data.
> >>
> >> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >> with copy_oldmem_page().
> >>
> >> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> >> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >> triggering a crash through /proc/sysrq_trigger:
> >>
> >>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>  # echo c > /proc/sysrq_trigger
> >>
> >> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >> ---
> >>  arch/loongarch/Kconfig                  |  22 ++++++
> >>  arch/loongarch/Makefile                 |   4 +
> >>  arch/loongarch/kernel/Makefile          |   3 +-
> >>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
> >>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
> >>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
> >>  arch/loongarch/kernel/mem.c             |   6 ++
> >>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
> >>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
> >>  arch/loongarch/kernel/traps.c           |   4 +
> >>  10 files changed, 217 insertions(+), 8 deletions(-)
> >>  create mode 100644 arch/loongarch/kernel/crash.c
> >>  create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>
> >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >> index 903c82fa958d..7c1b07a5b5bd 100644
> >> --- a/arch/loongarch/Kconfig
> >> +++ b/arch/loongarch/Kconfig
> >> @@ -420,6 +420,28 @@ config KEXEC
> >>
> >>           The name comes from the similarity to the exec system call.
> >>
> >> +config CRASH_DUMP
> >> +       bool "Build kdump crash kernel"
> >> +       help
> >> +         Generate crash dump after being started by kexec. This should
> >> +         be normally only set in special crash dump kernels which are
> >> +         loaded in the main kernel with kexec-tools into a specially
> >> +         reserved region and then later executed after a crash by
> >> +         kdump/kexec.
> >> +
> >> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> >> +
> >> +config PHYSICAL_START
> >> +       hex "Physical address where the kernel is loaded"
> >> +       default "0x9000000091000000" if 64BIT
> >> +       depends on CRASH_DUMP
> >> +       help
> >> +         This gives the XKPRANGE address where the kernel is loaded.
> >> +         If you plan to use kernel for capturing the crash dump change
> >> +         this value to start of the reserved region (the "X" value as
> >> +         specified in the "crashkernel=YM@XM" command line boot parameter
> >> +         passed to the panic-ed kernel).
> >> +
> >>  config SECCOMP
> >>         bool "Enable seccomp to safely compute untrusted bytecode"
> >>         depends on PROC_FS
> >> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >> index 4bc47f47cfd8..7dabd580426d 100644
> >> --- a/arch/loongarch/Makefile
> >> +++ b/arch/loongarch/Makefile
> >> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>  cflags-y += -ffreestanding
> >>  cflags-y += $(call cc-option, -mno-check-zero-division)
> >>
> >> +ifdef CONFIG_PHYSICAL_START
> >> +load-y         = $(CONFIG_PHYSICAL_START)
> >> +else
> >>  load-y         = 0x9000000000200000
> >> +endif
> >>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>
> >>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> >> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >> index 20b64ac3f128..df5aea129364 100644
> >> --- a/arch/loongarch/kernel/Makefile
> >> +++ b/arch/loongarch/kernel/Makefile
> >> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
> >>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
> >>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
> >>
> >> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> >> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
> >>
> >>  obj-$(CONFIG_PROC_FS)          += proc.o
> >>
> >> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >> new file mode 100644
> >> index 000000000000..b4f249ec6301
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/crash.c
> >> @@ -0,0 +1,100 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >> + *
> >> + * Derived from MIPS
> >> + */
> >> +#include <linux/kernel.h>
> >> +#include <linux/smp.h>
> >> +#include <linux/reboot.h>
> >> +#include <linux/crash_dump.h>
> >> +#include <linux/delay.h>
> >> +#include <linux/irq.h>
> >> +#include <linux/types.h>
> >> +#include <linux/sched.h>
> >> +#include <linux/sched/task_stack.h>
> >> +#include <asm/cacheflush.h>
> >> +#include <asm/kexec.h>
> >> +
> >> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >> +
> >> +#ifdef CONFIG_SMP
> >> +static void crash_shutdown_secondary(void *passed_regs)
> >> +{
> >> +       struct pt_regs *regs = passed_regs;
> >> +       int cpu = smp_processor_id();
> >> +
> >> +       /*
> >> +        * If we are passed registers, use those.  Otherwise get the
> >> +        * regs from the last interrupt, which should be correct, as
> >> +        * we are in an interrupt.  But if the regs are not there,
> >> +        * pull them from the top of the stack.  They are probably
> >> +        * wrong, but we need something to keep from crashing again.
> >> +        */
> >> +       if (!regs)
> >> +               regs = get_irq_regs();
> >> +       if (!regs)
> >> +               regs = task_pt_regs(current);
> >> +
> >> +       local_irq_disable();
> >> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >> +               crash_save_cpu(regs, cpu);
> >> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> >> +
> >> +       while (!atomic_read(&kexec_ready_to_reboot))
> >> +               cpu_relax();
> >> +
> >> +       kexec_reboot();
> >> +}
> >> +
> >> +/* Override the weak function in kernel/panic.c */
> >> +void crash_smp_send_stop(void)
> >> +{
> >> +       static int cpus_stopped;
> >> +       unsigned long timeout;
> >> +       unsigned int ncpus;
> >> +
> >> +       /*
> >> +        * This function can be called twice in panic path, but obviously
> >> +        * we execute this only once.
> >> +        */
> >> +       if (cpus_stopped)
> >> +               return;
> >> +
> >> +       cpus_stopped = 1;
> >> +
> >> +        /* Excluding the panic cpu */
> >> +       ncpus = num_online_cpus() - 1;
> >> +
> >> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> >> +       smp_wmb();
> >> +
> >> +       /*
> >> +        * The crash CPU sends an IPI and wait for other CPUs to
> >> +        * respond. Delay of at least 10 seconds.
> >> +        */
> >> +       pr_emerg("Sending IPI to other cpus...\n");
> >> +       timeout = USEC_PER_SEC * 10;
> >> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >> +               cpu_relax();
> >> +               udelay(1);
> >> +       }
> >> +}
> >> +
> >> +#endif
> >> +
> >> +void machine_crash_shutdown(struct pt_regs *regs)
> >> +{
> >> +       int crashing_cpu;
> >> +
> >> +       local_irq_disable();
> >> +
> >> +       crashing_cpu = smp_processor_id();
> >> +       crash_save_cpu(regs, crashing_cpu);
> >> +
> >> +       /* shutdown non-crashing cpus */
> >> +       crash_smp_send_stop();
> >> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >> +
> >> +       pr_info("Starting crashdump kernel...\n");
> >> +}
> >> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >> new file mode 100644
> >> index 000000000000..13e5d2f7870d
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/crash_dump.c
> >> @@ -0,0 +1,19 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +#include <linux/highmem.h>
> >> +#include <linux/crash_dump.h>
> >> +#include <linux/io.h>
> >> +
> >> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >> +                        size_t csize, unsigned long offset)
> >> +{
> >> +       void  *vaddr;
> >> +
> >> +       if (!csize)
> >> +               return 0;
> >> +
> >> +       vaddr = kmap_local_pfn(pfn);
> >> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> >> +       kunmap_local(vaddr);
> >> +
> >> +       return csize;
> >> +}
> >> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >> --- a/arch/loongarch/kernel/machine_kexec.c
> >> +++ b/arch/loongarch/kernel/machine_kexec.c
> >> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>                 continue;
> >>         }
> >>
> >> -       /* kexec need a safe page to save reboot_code_buffer */
> >> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>
> >>         reboot_code_buffer =
> >> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>
> >>         kexec_reboot();
> >>  }
> >> -
> >> -void machine_crash_shutdown(struct pt_regs *regs)
> >> -{
> >> -}
> >>  #endif
> >>
> >>  void machine_shutdown(void)
> >> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>
> >>         jump_addr = (unsigned long)phys_to_virt(image->start);
> >>
> >> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >> +       if (image->type == KEXEC_TYPE_DEFAULT)
> >> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>
> >>         /*
> >>          * The generic kexec code builds a page list with physical
> >> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>
> >>         /*
> >>          * We know we were online, and there will be no incoming IPIs at
> >> -        * this point.
> >> +        * this point. Mark online again before rebooting so that the crash
> >> +        * analysis tool will see us correctly.
> >>          */
> >>         set_cpu_online(smp_processor_id(), true);
> >>
> >> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >> index 7423361b0ebc..c6def6ff81c8 100644
> >> --- a/arch/loongarch/kernel/mem.c
> >> +++ b/arch/loongarch/kernel/mem.c
> >> @@ -5,6 +5,7 @@
> >>  #include <linux/efi.h>
> >>  #include <linux/initrd.h>
> >>  #include <linux/memblock.h>
> >> +#include <linux/of_fdt.h>
> >>
> >>  #include <asm/bootinfo.h>
> >>  #include <asm/loongson.h>
> >> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>
> >>         /* Reserve the initrd */
> >>         reserve_initrd_mem();
> >> +
> >> +       /* Mainly reserved memory for the elf core head */
> >> +       early_init_fdt_scan_reserved_mem();
> >> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> >> +       early_init_dt_check_for_usable_mem_range();
> >>  }
> >> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >> --- a/arch/loongarch/kernel/relocate_kernel.S
> >> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>         move            s2, a2
> >>         move            s3, a3
> >>
> >> +       /*
> >> +        * In case of a kdump/crash kernel, the indirection page is not
> >> +        * populated as the kernel is directly copied to a reserved location
> >> +        */
> >> +       beqz            s2, done
> >> +
> >>  process_entry:
> >>         PTR_L           s4, s2, 0
> >>         PTR_ADDI        s2, s2, SZREG
> >> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >> index f938aae3e92c..ea34b77e402f 100644
> >> --- a/arch/loongarch/kernel/setup.c
> >> +++ b/arch/loongarch/kernel/setup.c
> >> @@ -19,6 +19,8 @@
> >>  #include <linux/memblock.h>
> >>  #include <linux/initrd.h>
> >>  #include <linux/ioport.h>
> >> +#include <linux/kexec.h>
> >> +#include <linux/crash_dump.h>
> >>  #include <linux/root_dev.h>
> >>  #include <linux/console.h>
> >>  #include <linux/pfn.h>
> >> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>  }
> >>  early_param("mem", early_parse_mem);
> >>
> >> +static void __init loongarch_parse_crashkernel(void)
> >> +{
> >> +#ifdef CONFIG_KEXEC
> >> +       unsigned long long start;
> >> +       unsigned long long total_mem;
> >> +       unsigned long long crash_size, crash_base;
> >> +       int ret;
> >> +
> >> +       total_mem = memblock_phys_mem_size();
> >> +       ret = parse_crashkernel(boot_command_line, total_mem,
> >> +                               &crash_size, &crash_base);
> >> +       if (ret != 0 || crash_size <= 0)
> >> +               return;
> >> +
> >> +
> >> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >> +                                       crash_base + crash_size);
> >> +       if (start != crash_base) {
> >> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> >> +               return;
> >> +       }
> >> +
> >> +       crashk_res.start = crash_base;
> >> +       crashk_res.end   = crash_base + crash_size - 1;
> >> +#endif
> >> +}
> >> +
> >> +static void __init request_crashkernel(struct resource *res)
> >> +{
> >> +#ifdef CONFIG_KEXEC
> >> +       int ret;
> >> +
> >> +       if (crashk_res.start == crashk_res.end)
> >> +               return;
> >> +
> >> +       ret = request_resource(res, &crashk_res);
> >> +       if (!ret)
> >> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >> +                       (unsigned long)((crashk_res.end -
> >> +                                        crashk_res.start + 1) >> 20),
> >> +                       (unsigned long)(crashk_res.start  >> 20));
> >> +#endif
> >> +}
> >> +
> >>  void __init platform_init(void)
> >>  {
> >>         efi_init();
> >> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>
> >>         check_kernel_sections_mem();
> >>
> >> +       loongarch_parse_crashkernel();
> >> +
> >>         /*
> >>          * In order to reduce the possibility of kernel panic when failed to
> >>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>                 request_resource(res, &code_resource);
> >>                 request_resource(res, &data_resource);
> >>                 request_resource(res, &bss_resource);
> >> +               request_crashkernel(res);
> >>         }
> >>  }
> >>
> >> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >> index aa1c95aaf595..0e610872f3f4 100644
> >> --- a/arch/loongarch/kernel/traps.c
> >> +++ b/arch/loongarch/kernel/traps.c
> >> @@ -10,6 +10,7 @@
> >>  #include <linux/entry-common.h>
> >>  #include <linux/init.h>
> >>  #include <linux/kernel.h>
> >> +#include <linux/kexec.h>
> >>  #include <linux/module.h>
> >>  #include <linux/extable.h>
> >>  #include <linux/mm.h>
> >> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>
> >>         oops_exit();
> >>
> >> +       if (regs && kexec_should_crash(current))
> >> +               crash_kexec(regs);
> >> +
> >>         if (in_interrupt())
> >>                 panic("Fatal exception in interrupt");
> >>
> >> --
> >> 2.36.0
> >>
>
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Youling Tang 3 years, 7 months ago
Hi, Huacai

On 09/05/2022 09:38 AM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> I think crash.c can be merged into crash_dump.c
>>
>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>> I'm not sure if merging crash.c into crash_dump.c will break its
>> consistency?
>>
>> Thanks,
>> Youling
> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> can be merged into machine_kexec.c, as arm64 and riscv do.

For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
into machine_kexec.c, should crash_shutdown_secondary and
crash_smp_send_stop be placed in smp.c?

Youling.
>
> Huacai
>>
>>>
>>> Huacai
>>>
>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> This patch adds support for kdump, the kernel will reserve a region
>>>> for the crash kernel and jump there on panic.
>>>>
>>>> Arch-specific functions are added to allow for implementing a crash
>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>
>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>> separate region for the core's ELF header within crash kdump kernel
>>>> memory and filling it in when executing kexec_load().
>>>>
>>>> Then, its location will be advertised to crash dump kernel via a new
>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>
>>>> At the same time, it will also limit the crash kdump kernel to the
>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>> so as not to destroy the original kernel dump data.
>>>>
>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>> with copy_oldmem_page().
>>>>
>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>> triggering a crash through /proc/sysrq_trigger:
>>>>
>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>  # echo c > /proc/sysrq_trigger
>>>>
>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>> ---
>>>>  arch/loongarch/Kconfig                  |  22 ++++++
>>>>  arch/loongarch/Makefile                 |   4 +
>>>>  arch/loongarch/kernel/Makefile          |   3 +-
>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>>>  arch/loongarch/kernel/mem.c             |   6 ++
>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>>>  arch/loongarch/kernel/traps.c           |   4 +
>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>>>  create mode 100644 arch/loongarch/kernel/crash.c
>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>
>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>> --- a/arch/loongarch/Kconfig
>>>> +++ b/arch/loongarch/Kconfig
>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>
>>>>           The name comes from the similarity to the exec system call.
>>>>
>>>> +config CRASH_DUMP
>>>> +       bool "Build kdump crash kernel"
>>>> +       help
>>>> +         Generate crash dump after being started by kexec. This should
>>>> +         be normally only set in special crash dump kernels which are
>>>> +         loaded in the main kernel with kexec-tools into a specially
>>>> +         reserved region and then later executed after a crash by
>>>> +         kdump/kexec.
>>>> +
>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>>>> +
>>>> +config PHYSICAL_START
>>>> +       hex "Physical address where the kernel is loaded"
>>>> +       default "0x9000000091000000" if 64BIT
>>>> +       depends on CRASH_DUMP
>>>> +       help
>>>> +         This gives the XKPRANGE address where the kernel is loaded.
>>>> +         If you plan to use kernel for capturing the crash dump change
>>>> +         this value to start of the reserved region (the "X" value as
>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>>>> +         passed to the panic-ed kernel).
>>>> +
>>>>  config SECCOMP
>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>>>         depends on PROC_FS
>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>> --- a/arch/loongarch/Makefile
>>>> +++ b/arch/loongarch/Makefile
>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>  cflags-y += -ffreestanding
>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>
>>>> +ifdef CONFIG_PHYSICAL_START
>>>> +load-y         = $(CONFIG_PHYSICAL_START)
>>>> +else
>>>>  load-y         = 0x9000000000200000
>>>> +endif
>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>
>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>> index 20b64ac3f128..df5aea129364 100644
>>>> --- a/arch/loongarch/kernel/Makefile
>>>> +++ b/arch/loongarch/kernel/Makefile
>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>>>
>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>>>
>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>>>
>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>> new file mode 100644
>>>> index 000000000000..b4f249ec6301
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/kernel/crash.c
>>>> @@ -0,0 +1,100 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>> + *
>>>> + * Derived from MIPS
>>>> + */
>>>> +#include <linux/kernel.h>
>>>> +#include <linux/smp.h>
>>>> +#include <linux/reboot.h>
>>>> +#include <linux/crash_dump.h>
>>>> +#include <linux/delay.h>
>>>> +#include <linux/irq.h>
>>>> +#include <linux/types.h>
>>>> +#include <linux/sched.h>
>>>> +#include <linux/sched/task_stack.h>
>>>> +#include <asm/cacheflush.h>
>>>> +#include <asm/kexec.h>
>>>> +
>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>> +
>>>> +#ifdef CONFIG_SMP
>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>> +{
>>>> +       struct pt_regs *regs = passed_regs;
>>>> +       int cpu = smp_processor_id();
>>>> +
>>>> +       /*
>>>> +        * If we are passed registers, use those.  Otherwise get the
>>>> +        * regs from the last interrupt, which should be correct, as
>>>> +        * we are in an interrupt.  But if the regs are not there,
>>>> +        * pull them from the top of the stack.  They are probably
>>>> +        * wrong, but we need something to keep from crashing again.
>>>> +        */
>>>> +       if (!regs)
>>>> +               regs = get_irq_regs();
>>>> +       if (!regs)
>>>> +               regs = task_pt_regs(current);
>>>> +
>>>> +       local_irq_disable();
>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>> +               crash_save_cpu(regs, cpu);
>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>>>> +
>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
>>>> +               cpu_relax();
>>>> +
>>>> +       kexec_reboot();
>>>> +}
>>>> +
>>>> +/* Override the weak function in kernel/panic.c */
>>>> +void crash_smp_send_stop(void)
>>>> +{
>>>> +       static int cpus_stopped;
>>>> +       unsigned long timeout;
>>>> +       unsigned int ncpus;
>>>> +
>>>> +       /*
>>>> +        * This function can be called twice in panic path, but obviously
>>>> +        * we execute this only once.
>>>> +        */
>>>> +       if (cpus_stopped)
>>>> +               return;
>>>> +
>>>> +       cpus_stopped = 1;
>>>> +
>>>> +        /* Excluding the panic cpu */
>>>> +       ncpus = num_online_cpus() - 1;
>>>> +
>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>> +       smp_wmb();
>>>> +
>>>> +       /*
>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
>>>> +        * respond. Delay of at least 10 seconds.
>>>> +        */
>>>> +       pr_emerg("Sending IPI to other cpus...\n");
>>>> +       timeout = USEC_PER_SEC * 10;
>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>> +               cpu_relax();
>>>> +               udelay(1);
>>>> +       }
>>>> +}
>>>> +
>>>> +#endif
>>>> +
>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>> +{
>>>> +       int crashing_cpu;
>>>> +
>>>> +       local_irq_disable();
>>>> +
>>>> +       crashing_cpu = smp_processor_id();
>>>> +       crash_save_cpu(regs, crashing_cpu);
>>>> +
>>>> +       /* shutdown non-crashing cpus */
>>>> +       crash_smp_send_stop();
>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>> +
>>>> +       pr_info("Starting crashdump kernel...\n");
>>>> +}
>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>> new file mode 100644
>>>> index 000000000000..13e5d2f7870d
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>> @@ -0,0 +1,19 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +#include <linux/highmem.h>
>>>> +#include <linux/crash_dump.h>
>>>> +#include <linux/io.h>
>>>> +
>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>> +                        size_t csize, unsigned long offset)
>>>> +{
>>>> +       void  *vaddr;
>>>> +
>>>> +       if (!csize)
>>>> +               return 0;
>>>> +
>>>> +       vaddr = kmap_local_pfn(pfn);
>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>>>> +       kunmap_local(vaddr);
>>>> +
>>>> +       return csize;
>>>> +}
>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>                 continue;
>>>>         }
>>>>
>>>> -       /* kexec need a safe page to save reboot_code_buffer */
>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>
>>>>         reboot_code_buffer =
>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>
>>>>         kexec_reboot();
>>>>  }
>>>> -
>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>> -{
>>>> -}
>>>>  #endif
>>>>
>>>>  void machine_shutdown(void)
>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>
>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>
>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>
>>>>         /*
>>>>          * The generic kexec code builds a page list with physical
>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>
>>>>         /*
>>>>          * We know we were online, and there will be no incoming IPIs at
>>>> -        * this point.
>>>> +        * this point. Mark online again before rebooting so that the crash
>>>> +        * analysis tool will see us correctly.
>>>>          */
>>>>         set_cpu_online(smp_processor_id(), true);
>>>>
>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>> --- a/arch/loongarch/kernel/mem.c
>>>> +++ b/arch/loongarch/kernel/mem.c
>>>> @@ -5,6 +5,7 @@
>>>>  #include <linux/efi.h>
>>>>  #include <linux/initrd.h>
>>>>  #include <linux/memblock.h>
>>>> +#include <linux/of_fdt.h>
>>>>
>>>>  #include <asm/bootinfo.h>
>>>>  #include <asm/loongson.h>
>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>
>>>>         /* Reserve the initrd */
>>>>         reserve_initrd_mem();
>>>> +
>>>> +       /* Mainly reserved memory for the elf core head */
>>>> +       early_init_fdt_scan_reserved_mem();
>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>>>> +       early_init_dt_check_for_usable_mem_range();
>>>>  }
>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>         move            s2, a2
>>>>         move            s3, a3
>>>>
>>>> +       /*
>>>> +        * In case of a kdump/crash kernel, the indirection page is not
>>>> +        * populated as the kernel is directly copied to a reserved location
>>>> +        */
>>>> +       beqz            s2, done
>>>> +
>>>>  process_entry:
>>>>         PTR_L           s4, s2, 0
>>>>         PTR_ADDI        s2, s2, SZREG
>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>> index f938aae3e92c..ea34b77e402f 100644
>>>> --- a/arch/loongarch/kernel/setup.c
>>>> +++ b/arch/loongarch/kernel/setup.c
>>>> @@ -19,6 +19,8 @@
>>>>  #include <linux/memblock.h>
>>>>  #include <linux/initrd.h>
>>>>  #include <linux/ioport.h>
>>>> +#include <linux/kexec.h>
>>>> +#include <linux/crash_dump.h>
>>>>  #include <linux/root_dev.h>
>>>>  #include <linux/console.h>
>>>>  #include <linux/pfn.h>
>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>  }
>>>>  early_param("mem", early_parse_mem);
>>>>
>>>> +static void __init loongarch_parse_crashkernel(void)
>>>> +{
>>>> +#ifdef CONFIG_KEXEC
>>>> +       unsigned long long start;
>>>> +       unsigned long long total_mem;
>>>> +       unsigned long long crash_size, crash_base;
>>>> +       int ret;
>>>> +
>>>> +       total_mem = memblock_phys_mem_size();
>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>>>> +                               &crash_size, &crash_base);
>>>> +       if (ret != 0 || crash_size <= 0)
>>>> +               return;
>>>> +
>>>> +
>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>> +                                       crash_base + crash_size);
>>>> +       if (start != crash_base) {
>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       crashk_res.start = crash_base;
>>>> +       crashk_res.end   = crash_base + crash_size - 1;
>>>> +#endif
>>>> +}
>>>> +
>>>> +static void __init request_crashkernel(struct resource *res)
>>>> +{
>>>> +#ifdef CONFIG_KEXEC
>>>> +       int ret;
>>>> +
>>>> +       if (crashk_res.start == crashk_res.end)
>>>> +               return;
>>>> +
>>>> +       ret = request_resource(res, &crashk_res);
>>>> +       if (!ret)
>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>> +                       (unsigned long)((crashk_res.end -
>>>> +                                        crashk_res.start + 1) >> 20),
>>>> +                       (unsigned long)(crashk_res.start  >> 20));
>>>> +#endif
>>>> +}
>>>> +
>>>>  void __init platform_init(void)
>>>>  {
>>>>         efi_init();
>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>
>>>>         check_kernel_sections_mem();
>>>>
>>>> +       loongarch_parse_crashkernel();
>>>> +
>>>>         /*
>>>>          * In order to reduce the possibility of kernel panic when failed to
>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>                 request_resource(res, &code_resource);
>>>>                 request_resource(res, &data_resource);
>>>>                 request_resource(res, &bss_resource);
>>>> +               request_crashkernel(res);
>>>>         }
>>>>  }
>>>>
>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>> --- a/arch/loongarch/kernel/traps.c
>>>> +++ b/arch/loongarch/kernel/traps.c
>>>> @@ -10,6 +10,7 @@
>>>>  #include <linux/entry-common.h>
>>>>  #include <linux/init.h>
>>>>  #include <linux/kernel.h>
>>>> +#include <linux/kexec.h>
>>>>  #include <linux/module.h>
>>>>  #include <linux/extable.h>
>>>>  #include <linux/mm.h>
>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>
>>>>         oops_exit();
>>>>
>>>> +       if (regs && kexec_should_crash(current))
>>>> +               crash_kexec(regs);
>>>> +
>>>>         if (in_interrupt())
>>>>                 panic("Fatal exception in interrupt");
>>>>
>>>> --
>>>> 2.36.0
>>>>
>>
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Huacai Chen 3 years, 7 months ago
On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> Hi, Huacai
> >>
> >> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>> Hi, Youling,
> >>>
> >>> I think crash.c can be merged into crash_dump.c
> >>
> >> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >> I'm not sure if merging crash.c into crash_dump.c will break its
> >> consistency?
> >>
> >> Thanks,
> >> Youling
> > Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> > can be merged into machine_kexec.c, as arm64 and riscv do.
>
> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> into machine_kexec.c, should crash_shutdown_secondary and
> crash_smp_send_stop be placed in smp.c?
I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.

Huacai
>
> Youling.
> >
> > Huacai
> >>
> >>>
> >>> Huacai
> >>>
> >>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> This patch adds support for kdump, the kernel will reserve a region
> >>>> for the crash kernel and jump there on panic.
> >>>>
> >>>> Arch-specific functions are added to allow for implementing a crash
> >>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>
> >>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>> separate region for the core's ELF header within crash kdump kernel
> >>>> memory and filling it in when executing kexec_load().
> >>>>
> >>>> Then, its location will be advertised to crash dump kernel via a new
> >>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>
> >>>> At the same time, it will also limit the crash kdump kernel to the
> >>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>> so as not to destroy the original kernel dump data.
> >>>>
> >>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>> with copy_oldmem_page().
> >>>>
> >>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> >>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>> triggering a crash through /proc/sysrq_trigger:
> >>>>
> >>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>>  # echo c > /proc/sysrq_trigger
> >>>>
> >>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>> ---
> >>>>  arch/loongarch/Kconfig                  |  22 ++++++
> >>>>  arch/loongarch/Makefile                 |   4 +
> >>>>  arch/loongarch/kernel/Makefile          |   3 +-
> >>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
> >>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
> >>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
> >>>>  arch/loongarch/kernel/mem.c             |   6 ++
> >>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
> >>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
> >>>>  arch/loongarch/kernel/traps.c           |   4 +
> >>>>  10 files changed, 217 insertions(+), 8 deletions(-)
> >>>>  create mode 100644 arch/loongarch/kernel/crash.c
> >>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>
> >>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>> --- a/arch/loongarch/Kconfig
> >>>> +++ b/arch/loongarch/Kconfig
> >>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>
> >>>>           The name comes from the similarity to the exec system call.
> >>>>
> >>>> +config CRASH_DUMP
> >>>> +       bool "Build kdump crash kernel"
> >>>> +       help
> >>>> +         Generate crash dump after being started by kexec. This should
> >>>> +         be normally only set in special crash dump kernels which are
> >>>> +         loaded in the main kernel with kexec-tools into a specially
> >>>> +         reserved region and then later executed after a crash by
> >>>> +         kdump/kexec.
> >>>> +
> >>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>> +
> >>>> +config PHYSICAL_START
> >>>> +       hex "Physical address where the kernel is loaded"
> >>>> +       default "0x9000000091000000" if 64BIT
> >>>> +       depends on CRASH_DUMP
> >>>> +       help
> >>>> +         This gives the XKPRANGE address where the kernel is loaded.
> >>>> +         If you plan to use kernel for capturing the crash dump change
> >>>> +         this value to start of the reserved region (the "X" value as
> >>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
> >>>> +         passed to the panic-ed kernel).
> >>>> +
> >>>>  config SECCOMP
> >>>>         bool "Enable seccomp to safely compute untrusted bytecode"
> >>>>         depends on PROC_FS
> >>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>> --- a/arch/loongarch/Makefile
> >>>> +++ b/arch/loongarch/Makefile
> >>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>>  cflags-y += -ffreestanding
> >>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>
> >>>> +ifdef CONFIG_PHYSICAL_START
> >>>> +load-y         = $(CONFIG_PHYSICAL_START)
> >>>> +else
> >>>>  load-y         = 0x9000000000200000
> >>>> +endif
> >>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>
> >>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> >>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>> index 20b64ac3f128..df5aea129364 100644
> >>>> --- a/arch/loongarch/kernel/Makefile
> >>>> +++ b/arch/loongarch/kernel/Makefile
> >>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
> >>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
> >>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
> >>>>
> >>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> >>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
> >>>>
> >>>>  obj-$(CONFIG_PROC_FS)          += proc.o
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>> new file mode 100644
> >>>> index 000000000000..b4f249ec6301
> >>>> --- /dev/null
> >>>> +++ b/arch/loongarch/kernel/crash.c
> >>>> @@ -0,0 +1,100 @@
> >>>> +// SPDX-License-Identifier: GPL-2.0
> >>>> +/*
> >>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>> + *
> >>>> + * Derived from MIPS
> >>>> + */
> >>>> +#include <linux/kernel.h>
> >>>> +#include <linux/smp.h>
> >>>> +#include <linux/reboot.h>
> >>>> +#include <linux/crash_dump.h>
> >>>> +#include <linux/delay.h>
> >>>> +#include <linux/irq.h>
> >>>> +#include <linux/types.h>
> >>>> +#include <linux/sched.h>
> >>>> +#include <linux/sched/task_stack.h>
> >>>> +#include <asm/cacheflush.h>
> >>>> +#include <asm/kexec.h>
> >>>> +
> >>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>> +
> >>>> +#ifdef CONFIG_SMP
> >>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>> +{
> >>>> +       struct pt_regs *regs = passed_regs;
> >>>> +       int cpu = smp_processor_id();
> >>>> +
> >>>> +       /*
> >>>> +        * If we are passed registers, use those.  Otherwise get the
> >>>> +        * regs from the last interrupt, which should be correct, as
> >>>> +        * we are in an interrupt.  But if the regs are not there,
> >>>> +        * pull them from the top of the stack.  They are probably
> >>>> +        * wrong, but we need something to keep from crashing again.
> >>>> +        */
> >>>> +       if (!regs)
> >>>> +               regs = get_irq_regs();
> >>>> +       if (!regs)
> >>>> +               regs = task_pt_regs(current);
> >>>> +
> >>>> +       local_irq_disable();
> >>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>> +               crash_save_cpu(regs, cpu);
> >>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>> +
> >>>> +       while (!atomic_read(&kexec_ready_to_reboot))
> >>>> +               cpu_relax();
> >>>> +
> >>>> +       kexec_reboot();
> >>>> +}
> >>>> +
> >>>> +/* Override the weak function in kernel/panic.c */
> >>>> +void crash_smp_send_stop(void)
> >>>> +{
> >>>> +       static int cpus_stopped;
> >>>> +       unsigned long timeout;
> >>>> +       unsigned int ncpus;
> >>>> +
> >>>> +       /*
> >>>> +        * This function can be called twice in panic path, but obviously
> >>>> +        * we execute this only once.
> >>>> +        */
> >>>> +       if (cpus_stopped)
> >>>> +               return;
> >>>> +
> >>>> +       cpus_stopped = 1;
> >>>> +
> >>>> +        /* Excluding the panic cpu */
> >>>> +       ncpus = num_online_cpus() - 1;
> >>>> +
> >>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>> +       smp_wmb();
> >>>> +
> >>>> +       /*
> >>>> +        * The crash CPU sends an IPI and wait for other CPUs to
> >>>> +        * respond. Delay of at least 10 seconds.
> >>>> +        */
> >>>> +       pr_emerg("Sending IPI to other cpus...\n");
> >>>> +       timeout = USEC_PER_SEC * 10;
> >>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>> +               cpu_relax();
> >>>> +               udelay(1);
> >>>> +       }
> >>>> +}
> >>>> +
> >>>> +#endif
> >>>> +
> >>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>> +{
> >>>> +       int crashing_cpu;
> >>>> +
> >>>> +       local_irq_disable();
> >>>> +
> >>>> +       crashing_cpu = smp_processor_id();
> >>>> +       crash_save_cpu(regs, crashing_cpu);
> >>>> +
> >>>> +       /* shutdown non-crashing cpus */
> >>>> +       crash_smp_send_stop();
> >>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>> +
> >>>> +       pr_info("Starting crashdump kernel...\n");
> >>>> +}
> >>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>> new file mode 100644
> >>>> index 000000000000..13e5d2f7870d
> >>>> --- /dev/null
> >>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>> @@ -0,0 +1,19 @@
> >>>> +// SPDX-License-Identifier: GPL-2.0
> >>>> +#include <linux/highmem.h>
> >>>> +#include <linux/crash_dump.h>
> >>>> +#include <linux/io.h>
> >>>> +
> >>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>> +                        size_t csize, unsigned long offset)
> >>>> +{
> >>>> +       void  *vaddr;
> >>>> +
> >>>> +       if (!csize)
> >>>> +               return 0;
> >>>> +
> >>>> +       vaddr = kmap_local_pfn(pfn);
> >>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>> +       kunmap_local(vaddr);
> >>>> +
> >>>> +       return csize;
> >>>> +}
> >>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>>                 continue;
> >>>>         }
> >>>>
> >>>> -       /* kexec need a safe page to save reboot_code_buffer */
> >>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>
> >>>>         reboot_code_buffer =
> >>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>
> >>>>         kexec_reboot();
> >>>>  }
> >>>> -
> >>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>> -{
> >>>> -}
> >>>>  #endif
> >>>>
> >>>>  void machine_shutdown(void)
> >>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>
> >>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>
> >>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
> >>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>
> >>>>         /*
> >>>>          * The generic kexec code builds a page list with physical
> >>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>
> >>>>         /*
> >>>>          * We know we were online, and there will be no incoming IPIs at
> >>>> -        * this point.
> >>>> +        * this point. Mark online again before rebooting so that the crash
> >>>> +        * analysis tool will see us correctly.
> >>>>          */
> >>>>         set_cpu_online(smp_processor_id(), true);
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>> --- a/arch/loongarch/kernel/mem.c
> >>>> +++ b/arch/loongarch/kernel/mem.c
> >>>> @@ -5,6 +5,7 @@
> >>>>  #include <linux/efi.h>
> >>>>  #include <linux/initrd.h>
> >>>>  #include <linux/memblock.h>
> >>>> +#include <linux/of_fdt.h>
> >>>>
> >>>>  #include <asm/bootinfo.h>
> >>>>  #include <asm/loongson.h>
> >>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>
> >>>>         /* Reserve the initrd */
> >>>>         reserve_initrd_mem();
> >>>> +
> >>>> +       /* Mainly reserved memory for the elf core head */
> >>>> +       early_init_fdt_scan_reserved_mem();
> >>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>> +       early_init_dt_check_for_usable_mem_range();
> >>>>  }
> >>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>>         move            s2, a2
> >>>>         move            s3, a3
> >>>>
> >>>> +       /*
> >>>> +        * In case of a kdump/crash kernel, the indirection page is not
> >>>> +        * populated as the kernel is directly copied to a reserved location
> >>>> +        */
> >>>> +       beqz            s2, done
> >>>> +
> >>>>  process_entry:
> >>>>         PTR_L           s4, s2, 0
> >>>>         PTR_ADDI        s2, s2, SZREG
> >>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>> index f938aae3e92c..ea34b77e402f 100644
> >>>> --- a/arch/loongarch/kernel/setup.c
> >>>> +++ b/arch/loongarch/kernel/setup.c
> >>>> @@ -19,6 +19,8 @@
> >>>>  #include <linux/memblock.h>
> >>>>  #include <linux/initrd.h>
> >>>>  #include <linux/ioport.h>
> >>>> +#include <linux/kexec.h>
> >>>> +#include <linux/crash_dump.h>
> >>>>  #include <linux/root_dev.h>
> >>>>  #include <linux/console.h>
> >>>>  #include <linux/pfn.h>
> >>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>>  }
> >>>>  early_param("mem", early_parse_mem);
> >>>>
> >>>> +static void __init loongarch_parse_crashkernel(void)
> >>>> +{
> >>>> +#ifdef CONFIG_KEXEC
> >>>> +       unsigned long long start;
> >>>> +       unsigned long long total_mem;
> >>>> +       unsigned long long crash_size, crash_base;
> >>>> +       int ret;
> >>>> +
> >>>> +       total_mem = memblock_phys_mem_size();
> >>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
> >>>> +                               &crash_size, &crash_base);
> >>>> +       if (ret != 0 || crash_size <= 0)
> >>>> +               return;
> >>>> +
> >>>> +
> >>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>> +                                       crash_base + crash_size);
> >>>> +       if (start != crash_base) {
> >>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>> +               return;
> >>>> +       }
> >>>> +
> >>>> +       crashk_res.start = crash_base;
> >>>> +       crashk_res.end   = crash_base + crash_size - 1;
> >>>> +#endif
> >>>> +}
> >>>> +
> >>>> +static void __init request_crashkernel(struct resource *res)
> >>>> +{
> >>>> +#ifdef CONFIG_KEXEC
> >>>> +       int ret;
> >>>> +
> >>>> +       if (crashk_res.start == crashk_res.end)
> >>>> +               return;
> >>>> +
> >>>> +       ret = request_resource(res, &crashk_res);
> >>>> +       if (!ret)
> >>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>> +                       (unsigned long)((crashk_res.end -
> >>>> +                                        crashk_res.start + 1) >> 20),
> >>>> +                       (unsigned long)(crashk_res.start  >> 20));
> >>>> +#endif
> >>>> +}
> >>>> +
> >>>>  void __init platform_init(void)
> >>>>  {
> >>>>         efi_init();
> >>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>
> >>>>         check_kernel_sections_mem();
> >>>>
> >>>> +       loongarch_parse_crashkernel();
> >>>> +
> >>>>         /*
> >>>>          * In order to reduce the possibility of kernel panic when failed to
> >>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>>                 request_resource(res, &code_resource);
> >>>>                 request_resource(res, &data_resource);
> >>>>                 request_resource(res, &bss_resource);
> >>>> +               request_crashkernel(res);
> >>>>         }
> >>>>  }
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>> --- a/arch/loongarch/kernel/traps.c
> >>>> +++ b/arch/loongarch/kernel/traps.c
> >>>> @@ -10,6 +10,7 @@
> >>>>  #include <linux/entry-common.h>
> >>>>  #include <linux/init.h>
> >>>>  #include <linux/kernel.h>
> >>>> +#include <linux/kexec.h>
> >>>>  #include <linux/module.h>
> >>>>  #include <linux/extable.h>
> >>>>  #include <linux/mm.h>
> >>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>
> >>>>         oops_exit();
> >>>>
> >>>> +       if (regs && kexec_should_crash(current))
> >>>> +               crash_kexec(regs);
> >>>> +
> >>>>         if (in_interrupt())
> >>>>                 panic("Fatal exception in interrupt");
> >>>>
> >>>> --
> >>>> 2.36.0
> >>>>
> >>
>
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Youling Tang 3 years, 7 months ago

On 09/05/2022 10:14 AM, Huacai Chen wrote:
> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> Hi, Huacai
>>>>
>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>> Hi, Youling,
>>>>>
>>>>> I think crash.c can be merged into crash_dump.c
>>>>
>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>> consistency?
>>>>
>>>> Thanks,
>>>> Youling
>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>
>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>> into machine_kexec.c, should crash_shutdown_secondary and
>> crash_smp_send_stop be placed in smp.c?
> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.

Ok, I'll merge all into machine_kexec.c.

Youling.

>
> Huacai
>>
>> Youling.
>>>
>>> Huacai
>>>>
>>>>>
>>>>> Huacai
>>>>>
>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>> for the crash kernel and jump there on panic.
>>>>>>
>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>
>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>> memory and filling it in when executing kexec_load().
>>>>>>
>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>
>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>> so as not to destroy the original kernel dump data.
>>>>>>
>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>> with copy_oldmem_page().
>>>>>>
>>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>
>>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>>  # echo c > /proc/sysrq_trigger
>>>>>>
>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>> ---
>>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
>>>>>>  arch/loongarch/Makefile                 |   4 +
>>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
>>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
>>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>>>>>  arch/loongarch/kernel/traps.c           |   4 +
>>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
>>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>
>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>> --- a/arch/loongarch/Kconfig
>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>
>>>>>>           The name comes from the similarity to the exec system call.
>>>>>>
>>>>>> +config CRASH_DUMP
>>>>>> +       bool "Build kdump crash kernel"
>>>>>> +       help
>>>>>> +         Generate crash dump after being started by kexec. This should
>>>>>> +         be normally only set in special crash dump kernels which are
>>>>>> +         loaded in the main kernel with kexec-tools into a specially
>>>>>> +         reserved region and then later executed after a crash by
>>>>>> +         kdump/kexec.
>>>>>> +
>>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>> +
>>>>>> +config PHYSICAL_START
>>>>>> +       hex "Physical address where the kernel is loaded"
>>>>>> +       default "0x9000000091000000" if 64BIT
>>>>>> +       depends on CRASH_DUMP
>>>>>> +       help
>>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
>>>>>> +         If you plan to use kernel for capturing the crash dump change
>>>>>> +         this value to start of the reserved region (the "X" value as
>>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>> +         passed to the panic-ed kernel).
>>>>>> +
>>>>>>  config SECCOMP
>>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>>         depends on PROC_FS
>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>> --- a/arch/loongarch/Makefile
>>>>>> +++ b/arch/loongarch/Makefile
>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>>  cflags-y += -ffreestanding
>>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>
>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
>>>>>> +else
>>>>>>  load-y         = 0x9000000000200000
>>>>>> +endif
>>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>
>>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>>>>>
>>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>>>>>
>>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..b4f249ec6301
>>>>>> --- /dev/null
>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>> @@ -0,0 +1,100 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>> +/*
>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>> + *
>>>>>> + * Derived from MIPS
>>>>>> + */
>>>>>> +#include <linux/kernel.h>
>>>>>> +#include <linux/smp.h>
>>>>>> +#include <linux/reboot.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>> +#include <linux/delay.h>
>>>>>> +#include <linux/irq.h>
>>>>>> +#include <linux/types.h>
>>>>>> +#include <linux/sched.h>
>>>>>> +#include <linux/sched/task_stack.h>
>>>>>> +#include <asm/cacheflush.h>
>>>>>> +#include <asm/kexec.h>
>>>>>> +
>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>> +
>>>>>> +#ifdef CONFIG_SMP
>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>> +{
>>>>>> +       struct pt_regs *regs = passed_regs;
>>>>>> +       int cpu = smp_processor_id();
>>>>>> +
>>>>>> +       /*
>>>>>> +        * If we are passed registers, use those.  Otherwise get the
>>>>>> +        * regs from the last interrupt, which should be correct, as
>>>>>> +        * we are in an interrupt.  But if the regs are not there,
>>>>>> +        * pull them from the top of the stack.  They are probably
>>>>>> +        * wrong, but we need something to keep from crashing again.
>>>>>> +        */
>>>>>> +       if (!regs)
>>>>>> +               regs = get_irq_regs();
>>>>>> +       if (!regs)
>>>>>> +               regs = task_pt_regs(current);
>>>>>> +
>>>>>> +       local_irq_disable();
>>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>> +               crash_save_cpu(regs, cpu);
>>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>> +
>>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
>>>>>> +               cpu_relax();
>>>>>> +
>>>>>> +       kexec_reboot();
>>>>>> +}
>>>>>> +
>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>> +void crash_smp_send_stop(void)
>>>>>> +{
>>>>>> +       static int cpus_stopped;
>>>>>> +       unsigned long timeout;
>>>>>> +       unsigned int ncpus;
>>>>>> +
>>>>>> +       /*
>>>>>> +        * This function can be called twice in panic path, but obviously
>>>>>> +        * we execute this only once.
>>>>>> +        */
>>>>>> +       if (cpus_stopped)
>>>>>> +               return;
>>>>>> +
>>>>>> +       cpus_stopped = 1;
>>>>>> +
>>>>>> +        /* Excluding the panic cpu */
>>>>>> +       ncpus = num_online_cpus() - 1;
>>>>>> +
>>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>> +       smp_wmb();
>>>>>> +
>>>>>> +       /*
>>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
>>>>>> +        * respond. Delay of at least 10 seconds.
>>>>>> +        */
>>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
>>>>>> +       timeout = USEC_PER_SEC * 10;
>>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>> +               cpu_relax();
>>>>>> +               udelay(1);
>>>>>> +       }
>>>>>> +}
>>>>>> +
>>>>>> +#endif
>>>>>> +
>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>> +{
>>>>>> +       int crashing_cpu;
>>>>>> +
>>>>>> +       local_irq_disable();
>>>>>> +
>>>>>> +       crashing_cpu = smp_processor_id();
>>>>>> +       crash_save_cpu(regs, crashing_cpu);
>>>>>> +
>>>>>> +       /* shutdown non-crashing cpus */
>>>>>> +       crash_smp_send_stop();
>>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>> +
>>>>>> +       pr_info("Starting crashdump kernel...\n");
>>>>>> +}
>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..13e5d2f7870d
>>>>>> --- /dev/null
>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>> @@ -0,0 +1,19 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>> +#include <linux/highmem.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>> +#include <linux/io.h>
>>>>>> +
>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>> +                        size_t csize, unsigned long offset)
>>>>>> +{
>>>>>> +       void  *vaddr;
>>>>>> +
>>>>>> +       if (!csize)
>>>>>> +               return 0;
>>>>>> +
>>>>>> +       vaddr = kmap_local_pfn(pfn);
>>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>> +       kunmap_local(vaddr);
>>>>>> +
>>>>>> +       return csize;
>>>>>> +}
>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>>                 continue;
>>>>>>         }
>>>>>>
>>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
>>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>
>>>>>>         reboot_code_buffer =
>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>
>>>>>>         kexec_reboot();
>>>>>>  }
>>>>>> -
>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>> -{
>>>>>> -}
>>>>>>  #endif
>>>>>>
>>>>>>  void machine_shutdown(void)
>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>
>>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>
>>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>
>>>>>>         /*
>>>>>>          * The generic kexec code builds a page list with physical
>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>
>>>>>>         /*
>>>>>>          * We know we were online, and there will be no incoming IPIs at
>>>>>> -        * this point.
>>>>>> +        * this point. Mark online again before rebooting so that the crash
>>>>>> +        * analysis tool will see us correctly.
>>>>>>          */
>>>>>>         set_cpu_online(smp_processor_id(), true);
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>> @@ -5,6 +5,7 @@
>>>>>>  #include <linux/efi.h>
>>>>>>  #include <linux/initrd.h>
>>>>>>  #include <linux/memblock.h>
>>>>>> +#include <linux/of_fdt.h>
>>>>>>
>>>>>>  #include <asm/bootinfo.h>
>>>>>>  #include <asm/loongson.h>
>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>
>>>>>>         /* Reserve the initrd */
>>>>>>         reserve_initrd_mem();
>>>>>> +
>>>>>> +       /* Mainly reserved memory for the elf core head */
>>>>>> +       early_init_fdt_scan_reserved_mem();
>>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>> +       early_init_dt_check_for_usable_mem_range();
>>>>>>  }
>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>>         move            s2, a2
>>>>>>         move            s3, a3
>>>>>>
>>>>>> +       /*
>>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
>>>>>> +        * populated as the kernel is directly copied to a reserved location
>>>>>> +        */
>>>>>> +       beqz            s2, done
>>>>>> +
>>>>>>  process_entry:
>>>>>>         PTR_L           s4, s2, 0
>>>>>>         PTR_ADDI        s2, s2, SZREG
>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>> @@ -19,6 +19,8 @@
>>>>>>  #include <linux/memblock.h>
>>>>>>  #include <linux/initrd.h>
>>>>>>  #include <linux/ioport.h>
>>>>>> +#include <linux/kexec.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>>  #include <linux/root_dev.h>
>>>>>>  #include <linux/console.h>
>>>>>>  #include <linux/pfn.h>
>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>>  }
>>>>>>  early_param("mem", early_parse_mem);
>>>>>>
>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>> +{
>>>>>> +#ifdef CONFIG_KEXEC
>>>>>> +       unsigned long long start;
>>>>>> +       unsigned long long total_mem;
>>>>>> +       unsigned long long crash_size, crash_base;
>>>>>> +       int ret;
>>>>>> +
>>>>>> +       total_mem = memblock_phys_mem_size();
>>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>> +                               &crash_size, &crash_base);
>>>>>> +       if (ret != 0 || crash_size <= 0)
>>>>>> +               return;
>>>>>> +
>>>>>> +
>>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>> +                                       crash_base + crash_size);
>>>>>> +       if (start != crash_base) {
>>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>> +               return;
>>>>>> +       }
>>>>>> +
>>>>>> +       crashk_res.start = crash_base;
>>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
>>>>>> +#endif
>>>>>> +}
>>>>>> +
>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>> +{
>>>>>> +#ifdef CONFIG_KEXEC
>>>>>> +       int ret;
>>>>>> +
>>>>>> +       if (crashk_res.start == crashk_res.end)
>>>>>> +               return;
>>>>>> +
>>>>>> +       ret = request_resource(res, &crashk_res);
>>>>>> +       if (!ret)
>>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>> +                       (unsigned long)((crashk_res.end -
>>>>>> +                                        crashk_res.start + 1) >> 20),
>>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
>>>>>> +#endif
>>>>>> +}
>>>>>> +
>>>>>>  void __init platform_init(void)
>>>>>>  {
>>>>>>         efi_init();
>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>
>>>>>>         check_kernel_sections_mem();
>>>>>>
>>>>>> +       loongarch_parse_crashkernel();
>>>>>> +
>>>>>>         /*
>>>>>>          * In order to reduce the possibility of kernel panic when failed to
>>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>>                 request_resource(res, &code_resource);
>>>>>>                 request_resource(res, &data_resource);
>>>>>>                 request_resource(res, &bss_resource);
>>>>>> +               request_crashkernel(res);
>>>>>>         }
>>>>>>  }
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>> @@ -10,6 +10,7 @@
>>>>>>  #include <linux/entry-common.h>
>>>>>>  #include <linux/init.h>
>>>>>>  #include <linux/kernel.h>
>>>>>> +#include <linux/kexec.h>
>>>>>>  #include <linux/module.h>
>>>>>>  #include <linux/extable.h>
>>>>>>  #include <linux/mm.h>
>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>
>>>>>>         oops_exit();
>>>>>>
>>>>>> +       if (regs && kexec_should_crash(current))
>>>>>> +               crash_kexec(regs);
>>>>>> +
>>>>>>         if (in_interrupt())
>>>>>>                 panic("Fatal exception in interrupt");
>>>>>>
>>>>>> --
>>>>>> 2.36.0
>>>>>>
>>>>
>>
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Huacai Chen 3 years, 7 months ago
Hi, Youling,

On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
>
>
> On 09/05/2022 10:14 AM, Huacai Chen wrote:
> > On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> Hi, Huacai
> >>
> >> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> >>> Hi, Youling,
> >>>
> >>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> Hi, Huacai
> >>>>
> >>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>>>> Hi, Youling,
> >>>>>
> >>>>> I think crash.c can be merged into crash_dump.c
> >>>>
> >>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >>>> I'm not sure if merging crash.c into crash_dump.c will break its
> >>>> consistency?
> >>>>
> >>>> Thanks,
> >>>> Youling
> >>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> >>> can be merged into machine_kexec.c, as arm64 and riscv do.
> >>
> >> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> >> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> >> into machine_kexec.c, should crash_shutdown_secondary and
> >> crash_smp_send_stop be placed in smp.c?
> > I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>
> Ok, I'll merge all into machine_kexec.c.
>
> Youling.
Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
If you want to skip the "low memory", maybe we can use
0x9000000090000000 or 0x90000000a0000000?

Huacai
>
> >
> > Huacai
> >>
> >> Youling.
> >>>
> >>> Huacai
> >>>>
> >>>>>
> >>>>> Huacai
> >>>>>
> >>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>
> >>>>>> This patch adds support for kdump, the kernel will reserve a region
> >>>>>> for the crash kernel and jump there on panic.
> >>>>>>
> >>>>>> Arch-specific functions are added to allow for implementing a crash
> >>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>>>
> >>>>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>>>> separate region for the core's ELF header within crash kdump kernel
> >>>>>> memory and filling it in when executing kexec_load().
> >>>>>>
> >>>>>> Then, its location will be advertised to crash dump kernel via a new
> >>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>>>
> >>>>>> At the same time, it will also limit the crash kdump kernel to the
> >>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>>>> so as not to destroy the original kernel dump data.
> >>>>>>
> >>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>>>> with copy_oldmem_page().
> >>>>>>
> >>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> >>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>>>> triggering a crash through /proc/sysrq_trigger:
> >>>>>>
> >>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>>>>  # echo c > /proc/sysrq_trigger
> >>>>>>
> >>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>>>> ---
> >>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
> >>>>>>  arch/loongarch/Makefile                 |   4 +
> >>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
> >>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
> >>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
> >>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
> >>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
> >>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
> >>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
> >>>>>>  arch/loongarch/kernel/traps.c           |   4 +
> >>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
> >>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
> >>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>>>> --- a/arch/loongarch/Kconfig
> >>>>>> +++ b/arch/loongarch/Kconfig
> >>>>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>>>
> >>>>>>           The name comes from the similarity to the exec system call.
> >>>>>>
> >>>>>> +config CRASH_DUMP
> >>>>>> +       bool "Build kdump crash kernel"
> >>>>>> +       help
> >>>>>> +         Generate crash dump after being started by kexec. This should
> >>>>>> +         be normally only set in special crash dump kernels which are
> >>>>>> +         loaded in the main kernel with kexec-tools into a specially
> >>>>>> +         reserved region and then later executed after a crash by
> >>>>>> +         kdump/kexec.
> >>>>>> +
> >>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>>>> +
> >>>>>> +config PHYSICAL_START
> >>>>>> +       hex "Physical address where the kernel is loaded"
> >>>>>> +       default "0x9000000091000000" if 64BIT
> >>>>>> +       depends on CRASH_DUMP
> >>>>>> +       help
> >>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
> >>>>>> +         If you plan to use kernel for capturing the crash dump change
> >>>>>> +         this value to start of the reserved region (the "X" value as
> >>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
> >>>>>> +         passed to the panic-ed kernel).
> >>>>>> +
> >>>>>>  config SECCOMP
> >>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
> >>>>>>         depends on PROC_FS
> >>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>>>> --- a/arch/loongarch/Makefile
> >>>>>> +++ b/arch/loongarch/Makefile
> >>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>>>>  cflags-y += -ffreestanding
> >>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>>>
> >>>>>> +ifdef CONFIG_PHYSICAL_START
> >>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
> >>>>>> +else
> >>>>>>  load-y         = 0x9000000000200000
> >>>>>> +endif
> >>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>>>
> >>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> >>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>>>> index 20b64ac3f128..df5aea129364 100644
> >>>>>> --- a/arch/loongarch/kernel/Makefile
> >>>>>> +++ b/arch/loongarch/kernel/Makefile
> >>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
> >>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
> >>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
> >>>>>>
> >>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> >>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
> >>>>>>
> >>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>>>> new file mode 100644
> >>>>>> index 000000000000..b4f249ec6301
> >>>>>> --- /dev/null
> >>>>>> +++ b/arch/loongarch/kernel/crash.c
> >>>>>> @@ -0,0 +1,100 @@
> >>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>> +/*
> >>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>>>> + *
> >>>>>> + * Derived from MIPS
> >>>>>> + */
> >>>>>> +#include <linux/kernel.h>
> >>>>>> +#include <linux/smp.h>
> >>>>>> +#include <linux/reboot.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>> +#include <linux/delay.h>
> >>>>>> +#include <linux/irq.h>
> >>>>>> +#include <linux/types.h>
> >>>>>> +#include <linux/sched.h>
> >>>>>> +#include <linux/sched/task_stack.h>
> >>>>>> +#include <asm/cacheflush.h>
> >>>>>> +#include <asm/kexec.h>
> >>>>>> +
> >>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>>>> +
> >>>>>> +#ifdef CONFIG_SMP
> >>>>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>>>> +{
> >>>>>> +       struct pt_regs *regs = passed_regs;
> >>>>>> +       int cpu = smp_processor_id();
> >>>>>> +
> >>>>>> +       /*
> >>>>>> +        * If we are passed registers, use those.  Otherwise get the
> >>>>>> +        * regs from the last interrupt, which should be correct, as
> >>>>>> +        * we are in an interrupt.  But if the regs are not there,
> >>>>>> +        * pull them from the top of the stack.  They are probably
> >>>>>> +        * wrong, but we need something to keep from crashing again.
> >>>>>> +        */
> >>>>>> +       if (!regs)
> >>>>>> +               regs = get_irq_regs();
> >>>>>> +       if (!regs)
> >>>>>> +               regs = task_pt_regs(current);
> >>>>>> +
> >>>>>> +       local_irq_disable();
> >>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>>>> +               crash_save_cpu(regs, cpu);
> >>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>>>> +
> >>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
> >>>>>> +               cpu_relax();
> >>>>>> +
> >>>>>> +       kexec_reboot();
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* Override the weak function in kernel/panic.c */
> >>>>>> +void crash_smp_send_stop(void)
> >>>>>> +{
> >>>>>> +       static int cpus_stopped;
> >>>>>> +       unsigned long timeout;
> >>>>>> +       unsigned int ncpus;
> >>>>>> +
> >>>>>> +       /*
> >>>>>> +        * This function can be called twice in panic path, but obviously
> >>>>>> +        * we execute this only once.
> >>>>>> +        */
> >>>>>> +       if (cpus_stopped)
> >>>>>> +               return;
> >>>>>> +
> >>>>>> +       cpus_stopped = 1;
> >>>>>> +
> >>>>>> +        /* Excluding the panic cpu */
> >>>>>> +       ncpus = num_online_cpus() - 1;
> >>>>>> +
> >>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>>>> +       smp_wmb();
> >>>>>> +
> >>>>>> +       /*
> >>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
> >>>>>> +        * respond. Delay of at least 10 seconds.
> >>>>>> +        */
> >>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
> >>>>>> +       timeout = USEC_PER_SEC * 10;
> >>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>>>> +               cpu_relax();
> >>>>>> +               udelay(1);
> >>>>>> +       }
> >>>>>> +}
> >>>>>> +
> >>>>>> +#endif
> >>>>>> +
> >>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>> +{
> >>>>>> +       int crashing_cpu;
> >>>>>> +
> >>>>>> +       local_irq_disable();
> >>>>>> +
> >>>>>> +       crashing_cpu = smp_processor_id();
> >>>>>> +       crash_save_cpu(regs, crashing_cpu);
> >>>>>> +
> >>>>>> +       /* shutdown non-crashing cpus */
> >>>>>> +       crash_smp_send_stop();
> >>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>>>> +
> >>>>>> +       pr_info("Starting crashdump kernel...\n");
> >>>>>> +}
> >>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>>>> new file mode 100644
> >>>>>> index 000000000000..13e5d2f7870d
> >>>>>> --- /dev/null
> >>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>>>> @@ -0,0 +1,19 @@
> >>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>> +#include <linux/highmem.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>> +#include <linux/io.h>
> >>>>>> +
> >>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>>>> +                        size_t csize, unsigned long offset)
> >>>>>> +{
> >>>>>> +       void  *vaddr;
> >>>>>> +
> >>>>>> +       if (!csize)
> >>>>>> +               return 0;
> >>>>>> +
> >>>>>> +       vaddr = kmap_local_pfn(pfn);
> >>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>>>> +       kunmap_local(vaddr);
> >>>>>> +
> >>>>>> +       return csize;
> >>>>>> +}
> >>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>>>>                 continue;
> >>>>>>         }
> >>>>>>
> >>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
> >>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>>>
> >>>>>>         reboot_code_buffer =
> >>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>>>
> >>>>>>         kexec_reboot();
> >>>>>>  }
> >>>>>> -
> >>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>> -{
> >>>>>> -}
> >>>>>>  #endif
> >>>>>>
> >>>>>>  void machine_shutdown(void)
> >>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>
> >>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>>>
> >>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
> >>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>
> >>>>>>         /*
> >>>>>>          * The generic kexec code builds a page list with physical
> >>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>
> >>>>>>         /*
> >>>>>>          * We know we were online, and there will be no incoming IPIs at
> >>>>>> -        * this point.
> >>>>>> +        * this point. Mark online again before rebooting so that the crash
> >>>>>> +        * analysis tool will see us correctly.
> >>>>>>          */
> >>>>>>         set_cpu_online(smp_processor_id(), true);
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>>>> --- a/arch/loongarch/kernel/mem.c
> >>>>>> +++ b/arch/loongarch/kernel/mem.c
> >>>>>> @@ -5,6 +5,7 @@
> >>>>>>  #include <linux/efi.h>
> >>>>>>  #include <linux/initrd.h>
> >>>>>>  #include <linux/memblock.h>
> >>>>>> +#include <linux/of_fdt.h>
> >>>>>>
> >>>>>>  #include <asm/bootinfo.h>
> >>>>>>  #include <asm/loongson.h>
> >>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>>>
> >>>>>>         /* Reserve the initrd */
> >>>>>>         reserve_initrd_mem();
> >>>>>> +
> >>>>>> +       /* Mainly reserved memory for the elf core head */
> >>>>>> +       early_init_fdt_scan_reserved_mem();
> >>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>>>> +       early_init_dt_check_for_usable_mem_range();
> >>>>>>  }
> >>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>>>>         move            s2, a2
> >>>>>>         move            s3, a3
> >>>>>>
> >>>>>> +       /*
> >>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
> >>>>>> +        * populated as the kernel is directly copied to a reserved location
> >>>>>> +        */
> >>>>>> +       beqz            s2, done
> >>>>>> +
> >>>>>>  process_entry:
> >>>>>>         PTR_L           s4, s2, 0
> >>>>>>         PTR_ADDI        s2, s2, SZREG
> >>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>>>> index f938aae3e92c..ea34b77e402f 100644
> >>>>>> --- a/arch/loongarch/kernel/setup.c
> >>>>>> +++ b/arch/loongarch/kernel/setup.c
> >>>>>> @@ -19,6 +19,8 @@
> >>>>>>  #include <linux/memblock.h>
> >>>>>>  #include <linux/initrd.h>
> >>>>>>  #include <linux/ioport.h>
> >>>>>> +#include <linux/kexec.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>>  #include <linux/root_dev.h>
> >>>>>>  #include <linux/console.h>
> >>>>>>  #include <linux/pfn.h>
> >>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>>>>  }
> >>>>>>  early_param("mem", early_parse_mem);
> >>>>>>
> >>>>>> +static void __init loongarch_parse_crashkernel(void)
> >>>>>> +{
> >>>>>> +#ifdef CONFIG_KEXEC
> >>>>>> +       unsigned long long start;
> >>>>>> +       unsigned long long total_mem;
> >>>>>> +       unsigned long long crash_size, crash_base;
> >>>>>> +       int ret;
> >>>>>> +
> >>>>>> +       total_mem = memblock_phys_mem_size();
> >>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
> >>>>>> +                               &crash_size, &crash_base);
> >>>>>> +       if (ret != 0 || crash_size <= 0)
> >>>>>> +               return;
> >>>>>> +
> >>>>>> +
> >>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>>>> +                                       crash_base + crash_size);
> >>>>>> +       if (start != crash_base) {
> >>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>>>> +               return;
> >>>>>> +       }
> >>>>>> +
> >>>>>> +       crashk_res.start = crash_base;
> >>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
> >>>>>> +#endif
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void __init request_crashkernel(struct resource *res)
> >>>>>> +{
> >>>>>> +#ifdef CONFIG_KEXEC
> >>>>>> +       int ret;
> >>>>>> +
> >>>>>> +       if (crashk_res.start == crashk_res.end)
> >>>>>> +               return;
> >>>>>> +
> >>>>>> +       ret = request_resource(res, &crashk_res);
> >>>>>> +       if (!ret)
> >>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>>>> +                       (unsigned long)((crashk_res.end -
> >>>>>> +                                        crashk_res.start + 1) >> 20),
> >>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
> >>>>>> +#endif
> >>>>>> +}
> >>>>>> +
> >>>>>>  void __init platform_init(void)
> >>>>>>  {
> >>>>>>         efi_init();
> >>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>>>
> >>>>>>         check_kernel_sections_mem();
> >>>>>>
> >>>>>> +       loongarch_parse_crashkernel();
> >>>>>> +
> >>>>>>         /*
> >>>>>>          * In order to reduce the possibility of kernel panic when failed to
> >>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>>>>                 request_resource(res, &code_resource);
> >>>>>>                 request_resource(res, &data_resource);
> >>>>>>                 request_resource(res, &bss_resource);
> >>>>>> +               request_crashkernel(res);
> >>>>>>         }
> >>>>>>  }
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>>>> --- a/arch/loongarch/kernel/traps.c
> >>>>>> +++ b/arch/loongarch/kernel/traps.c
> >>>>>> @@ -10,6 +10,7 @@
> >>>>>>  #include <linux/entry-common.h>
> >>>>>>  #include <linux/init.h>
> >>>>>>  #include <linux/kernel.h>
> >>>>>> +#include <linux/kexec.h>
> >>>>>>  #include <linux/module.h>
> >>>>>>  #include <linux/extable.h>
> >>>>>>  #include <linux/mm.h>
> >>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>>>
> >>>>>>         oops_exit();
> >>>>>>
> >>>>>> +       if (regs && kexec_should_crash(current))
> >>>>>> +               crash_kexec(regs);
> >>>>>> +
> >>>>>>         if (in_interrupt())
> >>>>>>                 panic("Fatal exception in interrupt");
> >>>>>>
> >>>>>> --
> >>>>>> 2.36.0
> >>>>>>
> >>>>
> >>
>
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Youling Tang 3 years, 7 months ago
Hi, Huacai

On 09/05/2022 03:32 PM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>>
>>
>> On 09/05/2022 10:14 AM, Huacai Chen wrote:
>>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> Hi, Huacai
>>>>
>>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>>>> Hi, Youling,
>>>>>
>>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> Hi, Huacai
>>>>>>
>>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>>>> Hi, Youling,
>>>>>>>
>>>>>>> I think crash.c can be merged into crash_dump.c
>>>>>>
>>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>>>> consistency?
>>>>>>
>>>>>> Thanks,
>>>>>> Youling
>>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>>>
>>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>>>> into machine_kexec.c, should crash_shutdown_secondary and
>>>> crash_smp_send_stop be placed in smp.c?
>>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>>
>> Ok, I'll merge all into machine_kexec.c.
>>
>> Youling.
> Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
> If you want to skip the "low memory", maybe we can use
> 0x9000000090000000 or 0x90000000a0000000?

Because there are many holes in our memory layout, if PHYSICAL_START is
set to 0x90000000a0000000, the largest reserved area of ​​the crashkernel
will be 512M, beyond which it will fail.

# cat /proc/iomem
90400000-bfffffff : System RAM
c0020000-f9efffff : System RAM
   f6810000-f6813fff : Reserved

The second System RAM starts at 0x90400000, so 0x9000000090000000 will
be too small.

Youling.

>
> Huacai
>>
>>>
>>> Huacai
>>>>
>>>> Youling.
>>>>>
>>>>> Huacai
>>>>>>
>>>>>>>
>>>>>>> Huacai
>>>>>>>
>>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>
>>>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>>>> for the crash kernel and jump there on panic.
>>>>>>>>
>>>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>>>
>>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>>>> memory and filling it in when executing kexec_load().
>>>>>>>>
>>>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>>>
>>>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>>>> so as not to destroy the original kernel dump data.
>>>>>>>>
>>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>>>> with copy_oldmem_page().
>>>>>>>>
>>>>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>>>
>>>>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>>>>  # echo c > /proc/sysrq_trigger
>>>>>>>>
>>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>>>> ---
>>>>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
>>>>>>>>  arch/loongarch/Makefile                 |   4 +
>>>>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
>>>>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>>>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>>>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>>>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
>>>>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>>>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>>>>>>>  arch/loongarch/kernel/traps.c           |   4 +
>>>>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
>>>>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>>>> --- a/arch/loongarch/Kconfig
>>>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>>>
>>>>>>>>           The name comes from the similarity to the exec system call.
>>>>>>>>
>>>>>>>> +config CRASH_DUMP
>>>>>>>> +       bool "Build kdump crash kernel"
>>>>>>>> +       help
>>>>>>>> +         Generate crash dump after being started by kexec. This should
>>>>>>>> +         be normally only set in special crash dump kernels which are
>>>>>>>> +         loaded in the main kernel with kexec-tools into a specially
>>>>>>>> +         reserved region and then later executed after a crash by
>>>>>>>> +         kdump/kexec.
>>>>>>>> +
>>>>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>>>> +
>>>>>>>> +config PHYSICAL_START
>>>>>>>> +       hex "Physical address where the kernel is loaded"
>>>>>>>> +       default "0x9000000091000000" if 64BIT
>>>>>>>> +       depends on CRASH_DUMP
>>>>>>>> +       help
>>>>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
>>>>>>>> +         If you plan to use kernel for capturing the crash dump change
>>>>>>>> +         this value to start of the reserved region (the "X" value as
>>>>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>>>> +         passed to the panic-ed kernel).
>>>>>>>> +
>>>>>>>>  config SECCOMP
>>>>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>>>>         depends on PROC_FS
>>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>>>> --- a/arch/loongarch/Makefile
>>>>>>>> +++ b/arch/loongarch/Makefile
>>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>>>>  cflags-y += -ffreestanding
>>>>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>>>
>>>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
>>>>>>>> +else
>>>>>>>>  load-y         = 0x9000000000200000
>>>>>>>> +endif
>>>>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>>>
>>>>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>>>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>>>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>>>>>>>
>>>>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>>>>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>>>>>>>
>>>>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..b4f249ec6301
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>>>> @@ -0,0 +1,100 @@
>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>> +/*
>>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>>>> + *
>>>>>>>> + * Derived from MIPS
>>>>>>>> + */
>>>>>>>> +#include <linux/kernel.h>
>>>>>>>> +#include <linux/smp.h>
>>>>>>>> +#include <linux/reboot.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>> +#include <linux/delay.h>
>>>>>>>> +#include <linux/irq.h>
>>>>>>>> +#include <linux/types.h>
>>>>>>>> +#include <linux/sched.h>
>>>>>>>> +#include <linux/sched/task_stack.h>
>>>>>>>> +#include <asm/cacheflush.h>
>>>>>>>> +#include <asm/kexec.h>
>>>>>>>> +
>>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_SMP
>>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>>>> +{
>>>>>>>> +       struct pt_regs *regs = passed_regs;
>>>>>>>> +       int cpu = smp_processor_id();
>>>>>>>> +
>>>>>>>> +       /*
>>>>>>>> +        * If we are passed registers, use those.  Otherwise get the
>>>>>>>> +        * regs from the last interrupt, which should be correct, as
>>>>>>>> +        * we are in an interrupt.  But if the regs are not there,
>>>>>>>> +        * pull them from the top of the stack.  They are probably
>>>>>>>> +        * wrong, but we need something to keep from crashing again.
>>>>>>>> +        */
>>>>>>>> +       if (!regs)
>>>>>>>> +               regs = get_irq_regs();
>>>>>>>> +       if (!regs)
>>>>>>>> +               regs = task_pt_regs(current);
>>>>>>>> +
>>>>>>>> +       local_irq_disable();
>>>>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>>>> +               crash_save_cpu(regs, cpu);
>>>>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>>>> +
>>>>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
>>>>>>>> +               cpu_relax();
>>>>>>>> +
>>>>>>>> +       kexec_reboot();
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>>>> +void crash_smp_send_stop(void)
>>>>>>>> +{
>>>>>>>> +       static int cpus_stopped;
>>>>>>>> +       unsigned long timeout;
>>>>>>>> +       unsigned int ncpus;
>>>>>>>> +
>>>>>>>> +       /*
>>>>>>>> +        * This function can be called twice in panic path, but obviously
>>>>>>>> +        * we execute this only once.
>>>>>>>> +        */
>>>>>>>> +       if (cpus_stopped)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>> +       cpus_stopped = 1;
>>>>>>>> +
>>>>>>>> +        /* Excluding the panic cpu */
>>>>>>>> +       ncpus = num_online_cpus() - 1;
>>>>>>>> +
>>>>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>>>> +       smp_wmb();
>>>>>>>> +
>>>>>>>> +       /*
>>>>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
>>>>>>>> +        * respond. Delay of at least 10 seconds.
>>>>>>>> +        */
>>>>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
>>>>>>>> +       timeout = USEC_PER_SEC * 10;
>>>>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>>>> +               cpu_relax();
>>>>>>>> +               udelay(1);
>>>>>>>> +       }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>> +{
>>>>>>>> +       int crashing_cpu;
>>>>>>>> +
>>>>>>>> +       local_irq_disable();
>>>>>>>> +
>>>>>>>> +       crashing_cpu = smp_processor_id();
>>>>>>>> +       crash_save_cpu(regs, crashing_cpu);
>>>>>>>> +
>>>>>>>> +       /* shutdown non-crashing cpus */
>>>>>>>> +       crash_smp_send_stop();
>>>>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>>>> +
>>>>>>>> +       pr_info("Starting crashdump kernel...\n");
>>>>>>>> +}
>>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..13e5d2f7870d
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>>>> @@ -0,0 +1,19 @@
>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>> +#include <linux/highmem.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>> +#include <linux/io.h>
>>>>>>>> +
>>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>>>> +                        size_t csize, unsigned long offset)
>>>>>>>> +{
>>>>>>>> +       void  *vaddr;
>>>>>>>> +
>>>>>>>> +       if (!csize)
>>>>>>>> +               return 0;
>>>>>>>> +
>>>>>>>> +       vaddr = kmap_local_pfn(pfn);
>>>>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>>>> +       kunmap_local(vaddr);
>>>>>>>> +
>>>>>>>> +       return csize;
>>>>>>>> +}
>>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>>>>                 continue;
>>>>>>>>         }
>>>>>>>>
>>>>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
>>>>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>>>
>>>>>>>>         reboot_code_buffer =
>>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>>>
>>>>>>>>         kexec_reboot();
>>>>>>>>  }
>>>>>>>> -
>>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>> -{
>>>>>>>> -}
>>>>>>>>  #endif
>>>>>>>>
>>>>>>>>  void machine_shutdown(void)
>>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>
>>>>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>>>
>>>>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>
>>>>>>>>         /*
>>>>>>>>          * The generic kexec code builds a page list with physical
>>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>
>>>>>>>>         /*
>>>>>>>>          * We know we were online, and there will be no incoming IPIs at
>>>>>>>> -        * this point.
>>>>>>>> +        * this point. Mark online again before rebooting so that the crash
>>>>>>>> +        * analysis tool will see us correctly.
>>>>>>>>          */
>>>>>>>>         set_cpu_online(smp_processor_id(), true);
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>>>> @@ -5,6 +5,7 @@
>>>>>>>>  #include <linux/efi.h>
>>>>>>>>  #include <linux/initrd.h>
>>>>>>>>  #include <linux/memblock.h>
>>>>>>>> +#include <linux/of_fdt.h>
>>>>>>>>
>>>>>>>>  #include <asm/bootinfo.h>
>>>>>>>>  #include <asm/loongson.h>
>>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>>>
>>>>>>>>         /* Reserve the initrd */
>>>>>>>>         reserve_initrd_mem();
>>>>>>>> +
>>>>>>>> +       /* Mainly reserved memory for the elf core head */
>>>>>>>> +       early_init_fdt_scan_reserved_mem();
>>>>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>>>> +       early_init_dt_check_for_usable_mem_range();
>>>>>>>>  }
>>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>>>>         move            s2, a2
>>>>>>>>         move            s3, a3
>>>>>>>>
>>>>>>>> +       /*
>>>>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
>>>>>>>> +        * populated as the kernel is directly copied to a reserved location
>>>>>>>> +        */
>>>>>>>> +       beqz            s2, done
>>>>>>>> +
>>>>>>>>  process_entry:
>>>>>>>>         PTR_L           s4, s2, 0
>>>>>>>>         PTR_ADDI        s2, s2, SZREG
>>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>>>> @@ -19,6 +19,8 @@
>>>>>>>>  #include <linux/memblock.h>
>>>>>>>>  #include <linux/initrd.h>
>>>>>>>>  #include <linux/ioport.h>
>>>>>>>> +#include <linux/kexec.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>  #include <linux/root_dev.h>
>>>>>>>>  #include <linux/console.h>
>>>>>>>>  #include <linux/pfn.h>
>>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>>>>  }
>>>>>>>>  early_param("mem", early_parse_mem);
>>>>>>>>
>>>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>>>> +{
>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>> +       unsigned long long start;
>>>>>>>> +       unsigned long long total_mem;
>>>>>>>> +       unsigned long long crash_size, crash_base;
>>>>>>>> +       int ret;
>>>>>>>> +
>>>>>>>> +       total_mem = memblock_phys_mem_size();
>>>>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>>>> +                               &crash_size, &crash_base);
>>>>>>>> +       if (ret != 0 || crash_size <= 0)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>> +
>>>>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>>>> +                                       crash_base + crash_size);
>>>>>>>> +       if (start != crash_base) {
>>>>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>>>> +               return;
>>>>>>>> +       }
>>>>>>>> +
>>>>>>>> +       crashk_res.start = crash_base;
>>>>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
>>>>>>>> +#endif
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>>>> +{
>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>> +       int ret;
>>>>>>>> +
>>>>>>>> +       if (crashk_res.start == crashk_res.end)
>>>>>>>> +               return;
>>>>>>>> +
>>>>>>>> +       ret = request_resource(res, &crashk_res);
>>>>>>>> +       if (!ret)
>>>>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>>>> +                       (unsigned long)((crashk_res.end -
>>>>>>>> +                                        crashk_res.start + 1) >> 20),
>>>>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
>>>>>>>> +#endif
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  void __init platform_init(void)
>>>>>>>>  {
>>>>>>>>         efi_init();
>>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>>>
>>>>>>>>         check_kernel_sections_mem();
>>>>>>>>
>>>>>>>> +       loongarch_parse_crashkernel();
>>>>>>>> +
>>>>>>>>         /*
>>>>>>>>          * In order to reduce the possibility of kernel panic when failed to
>>>>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>>>>                 request_resource(res, &code_resource);
>>>>>>>>                 request_resource(res, &data_resource);
>>>>>>>>                 request_resource(res, &bss_resource);
>>>>>>>> +               request_crashkernel(res);
>>>>>>>>         }
>>>>>>>>  }
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>>  #include <linux/entry-common.h>
>>>>>>>>  #include <linux/init.h>
>>>>>>>>  #include <linux/kernel.h>
>>>>>>>> +#include <linux/kexec.h>
>>>>>>>>  #include <linux/module.h>
>>>>>>>>  #include <linux/extable.h>
>>>>>>>>  #include <linux/mm.h>
>>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>>>
>>>>>>>>         oops_exit();
>>>>>>>>
>>>>>>>> +       if (regs && kexec_should_crash(current))
>>>>>>>> +               crash_kexec(regs);
>>>>>>>> +
>>>>>>>>         if (in_interrupt())
>>>>>>>>                 panic("Fatal exception in interrupt");
>>>>>>>>
>>>>>>>> --
>>>>>>>> 2.36.0
>>>>>>>>
>>>>>>
>>>>
>>

Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Huacai Chen 3 years, 7 months ago
Hi, Youling,

On Mon, Sep 5, 2022 at 3:45 PM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/05/2022 03:32 PM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >>
> >>
> >> On 09/05/2022 10:14 AM, Huacai Chen wrote:
> >>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> Hi, Huacai
> >>>>
> >>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> >>>>> Hi, Youling,
> >>>>>
> >>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>
> >>>>>> Hi, Huacai
> >>>>>>
> >>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>>>>>> Hi, Youling,
> >>>>>>>
> >>>>>>> I think crash.c can be merged into crash_dump.c
> >>>>>>
> >>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
> >>>>>> consistency?
> >>>>>>
> >>>>>> Thanks,
> >>>>>> Youling
> >>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> >>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
> >>>>
> >>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> >>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> >>>> into machine_kexec.c, should crash_shutdown_secondary and
> >>>> crash_smp_send_stop be placed in smp.c?
> >>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
> >>
> >> Ok, I'll merge all into machine_kexec.c.
> >>
> >> Youling.
> > Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
> > If you want to skip the "low memory", maybe we can use
> > 0x9000000090000000 or 0x90000000a0000000?
>
> Because there are many holes in our memory layout, if PHYSICAL_START is
> set to 0x90000000a0000000, the largest reserved area of the crashkernel
> will be 512M, beyond which it will fail.
Then 0x9000000090000000 is not suitable, but I think 512M is enough?
If so, let's use 0x90000000a0000000.

Huacai

Huacai
>
> # cat /proc/iomem
> 90400000-bfffffff : System RAM
> c0020000-f9efffff : System RAM
>    f6810000-f6813fff : Reserved
>
> The second System RAM starts at 0x90400000, so 0x9000000090000000 will
> be too small.
>
> Youling.
>
> >
> > Huacai
> >>
> >>>
> >>> Huacai
> >>>>
> >>>> Youling.
> >>>>>
> >>>>> Huacai
> >>>>>>
> >>>>>>>
> >>>>>>> Huacai
> >>>>>>>
> >>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>>>
> >>>>>>>> This patch adds support for kdump, the kernel will reserve a region
> >>>>>>>> for the crash kernel and jump there on panic.
> >>>>>>>>
> >>>>>>>> Arch-specific functions are added to allow for implementing a crash
> >>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>>>>>
> >>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>>>>>> separate region for the core's ELF header within crash kdump kernel
> >>>>>>>> memory and filling it in when executing kexec_load().
> >>>>>>>>
> >>>>>>>> Then, its location will be advertised to crash dump kernel via a new
> >>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>>>>>
> >>>>>>>> At the same time, it will also limit the crash kdump kernel to the
> >>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>>>>>> so as not to destroy the original kernel dump data.
> >>>>>>>>
> >>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>>>>>> with copy_oldmem_page().
> >>>>>>>>
> >>>>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
> >>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>>>>>> triggering a crash through /proc/sysrq_trigger:
> >>>>>>>>
> >>>>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>>>>>>  # echo c > /proc/sysrq_trigger
> >>>>>>>>
> >>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>>>>>> ---
> >>>>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
> >>>>>>>>  arch/loongarch/Makefile                 |   4 +
> >>>>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
> >>>>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
> >>>>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
> >>>>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
> >>>>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
> >>>>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
> >>>>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
> >>>>>>>>  arch/loongarch/kernel/traps.c           |   4 +
> >>>>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
> >>>>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
> >>>>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>>>>>> --- a/arch/loongarch/Kconfig
> >>>>>>>> +++ b/arch/loongarch/Kconfig
> >>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>>>>>
> >>>>>>>>           The name comes from the similarity to the exec system call.
> >>>>>>>>
> >>>>>>>> +config CRASH_DUMP
> >>>>>>>> +       bool "Build kdump crash kernel"
> >>>>>>>> +       help
> >>>>>>>> +         Generate crash dump after being started by kexec. This should
> >>>>>>>> +         be normally only set in special crash dump kernels which are
> >>>>>>>> +         loaded in the main kernel with kexec-tools into a specially
> >>>>>>>> +         reserved region and then later executed after a crash by
> >>>>>>>> +         kdump/kexec.
> >>>>>>>> +
> >>>>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>>>>>> +
> >>>>>>>> +config PHYSICAL_START
> >>>>>>>> +       hex "Physical address where the kernel is loaded"
> >>>>>>>> +       default "0x9000000091000000" if 64BIT
> >>>>>>>> +       depends on CRASH_DUMP
> >>>>>>>> +       help
> >>>>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
> >>>>>>>> +         If you plan to use kernel for capturing the crash dump change
> >>>>>>>> +         this value to start of the reserved region (the "X" value as
> >>>>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
> >>>>>>>> +         passed to the panic-ed kernel).
> >>>>>>>> +
> >>>>>>>>  config SECCOMP
> >>>>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
> >>>>>>>>         depends on PROC_FS
> >>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>>>>>> --- a/arch/loongarch/Makefile
> >>>>>>>> +++ b/arch/loongarch/Makefile
> >>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>>>>>>  cflags-y += -ffreestanding
> >>>>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>>>>>
> >>>>>>>> +ifdef CONFIG_PHYSICAL_START
> >>>>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
> >>>>>>>> +else
> >>>>>>>>  load-y         = 0x9000000000200000
> >>>>>>>> +endif
> >>>>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>>>>>
> >>>>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
> >>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>>>>>> index 20b64ac3f128..df5aea129364 100644
> >>>>>>>> --- a/arch/loongarch/kernel/Makefile
> >>>>>>>> +++ b/arch/loongarch/kernel/Makefile
> >>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
> >>>>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
> >>>>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
> >>>>>>>>
> >>>>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
> >>>>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
> >>>>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
> >>>>>>>>
> >>>>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..b4f249ec6301
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/loongarch/kernel/crash.c
> >>>>>>>> @@ -0,0 +1,100 @@
> >>>>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>>>> +/*
> >>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>>>>>> + *
> >>>>>>>> + * Derived from MIPS
> >>>>>>>> + */
> >>>>>>>> +#include <linux/kernel.h>
> >>>>>>>> +#include <linux/smp.h>
> >>>>>>>> +#include <linux/reboot.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>> +#include <linux/delay.h>
> >>>>>>>> +#include <linux/irq.h>
> >>>>>>>> +#include <linux/types.h>
> >>>>>>>> +#include <linux/sched.h>
> >>>>>>>> +#include <linux/sched/task_stack.h>
> >>>>>>>> +#include <asm/cacheflush.h>
> >>>>>>>> +#include <asm/kexec.h>
> >>>>>>>> +
> >>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>>>>>> +
> >>>>>>>> +#ifdef CONFIG_SMP
> >>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>>>>>> +{
> >>>>>>>> +       struct pt_regs *regs = passed_regs;
> >>>>>>>> +       int cpu = smp_processor_id();
> >>>>>>>> +
> >>>>>>>> +       /*
> >>>>>>>> +        * If we are passed registers, use those.  Otherwise get the
> >>>>>>>> +        * regs from the last interrupt, which should be correct, as
> >>>>>>>> +        * we are in an interrupt.  But if the regs are not there,
> >>>>>>>> +        * pull them from the top of the stack.  They are probably
> >>>>>>>> +        * wrong, but we need something to keep from crashing again.
> >>>>>>>> +        */
> >>>>>>>> +       if (!regs)
> >>>>>>>> +               regs = get_irq_regs();
> >>>>>>>> +       if (!regs)
> >>>>>>>> +               regs = task_pt_regs(current);
> >>>>>>>> +
> >>>>>>>> +       local_irq_disable();
> >>>>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>>>>>> +               crash_save_cpu(regs, cpu);
> >>>>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>>>>>> +
> >>>>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
> >>>>>>>> +               cpu_relax();
> >>>>>>>> +
> >>>>>>>> +       kexec_reboot();
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +/* Override the weak function in kernel/panic.c */
> >>>>>>>> +void crash_smp_send_stop(void)
> >>>>>>>> +{
> >>>>>>>> +       static int cpus_stopped;
> >>>>>>>> +       unsigned long timeout;
> >>>>>>>> +       unsigned int ncpus;
> >>>>>>>> +
> >>>>>>>> +       /*
> >>>>>>>> +        * This function can be called twice in panic path, but obviously
> >>>>>>>> +        * we execute this only once.
> >>>>>>>> +        */
> >>>>>>>> +       if (cpus_stopped)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>> +       cpus_stopped = 1;
> >>>>>>>> +
> >>>>>>>> +        /* Excluding the panic cpu */
> >>>>>>>> +       ncpus = num_online_cpus() - 1;
> >>>>>>>> +
> >>>>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>>>>>> +       smp_wmb();
> >>>>>>>> +
> >>>>>>>> +       /*
> >>>>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
> >>>>>>>> +        * respond. Delay of at least 10 seconds.
> >>>>>>>> +        */
> >>>>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
> >>>>>>>> +       timeout = USEC_PER_SEC * 10;
> >>>>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>>>>>> +               cpu_relax();
> >>>>>>>> +               udelay(1);
> >>>>>>>> +       }
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +#endif
> >>>>>>>> +
> >>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>>>> +{
> >>>>>>>> +       int crashing_cpu;
> >>>>>>>> +
> >>>>>>>> +       local_irq_disable();
> >>>>>>>> +
> >>>>>>>> +       crashing_cpu = smp_processor_id();
> >>>>>>>> +       crash_save_cpu(regs, crashing_cpu);
> >>>>>>>> +
> >>>>>>>> +       /* shutdown non-crashing cpus */
> >>>>>>>> +       crash_smp_send_stop();
> >>>>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>>>>>> +
> >>>>>>>> +       pr_info("Starting crashdump kernel...\n");
> >>>>>>>> +}
> >>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..13e5d2f7870d
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>>>>>> @@ -0,0 +1,19 @@
> >>>>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>>>> +#include <linux/highmem.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>> +#include <linux/io.h>
> >>>>>>>> +
> >>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>>>>>> +                        size_t csize, unsigned long offset)
> >>>>>>>> +{
> >>>>>>>> +       void  *vaddr;
> >>>>>>>> +
> >>>>>>>> +       if (!csize)
> >>>>>>>> +               return 0;
> >>>>>>>> +
> >>>>>>>> +       vaddr = kmap_local_pfn(pfn);
> >>>>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>>>>>> +       kunmap_local(vaddr);
> >>>>>>>> +
> >>>>>>>> +       return csize;
> >>>>>>>> +}
> >>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>>>>>>                 continue;
> >>>>>>>>         }
> >>>>>>>>
> >>>>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
> >>>>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>>>>>
> >>>>>>>>         reboot_code_buffer =
> >>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>>>>>
> >>>>>>>>         kexec_reboot();
> >>>>>>>>  }
> >>>>>>>> -
> >>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>>>> -{
> >>>>>>>> -}
> >>>>>>>>  #endif
> >>>>>>>>
> >>>>>>>>  void machine_shutdown(void)
> >>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>>>
> >>>>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>>>>>
> >>>>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
> >>>>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>>>
> >>>>>>>>         /*
> >>>>>>>>          * The generic kexec code builds a page list with physical
> >>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>>>
> >>>>>>>>         /*
> >>>>>>>>          * We know we were online, and there will be no incoming IPIs at
> >>>>>>>> -        * this point.
> >>>>>>>> +        * this point. Mark online again before rebooting so that the crash
> >>>>>>>> +        * analysis tool will see us correctly.
> >>>>>>>>          */
> >>>>>>>>         set_cpu_online(smp_processor_id(), true);
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>>>>>> --- a/arch/loongarch/kernel/mem.c
> >>>>>>>> +++ b/arch/loongarch/kernel/mem.c
> >>>>>>>> @@ -5,6 +5,7 @@
> >>>>>>>>  #include <linux/efi.h>
> >>>>>>>>  #include <linux/initrd.h>
> >>>>>>>>  #include <linux/memblock.h>
> >>>>>>>> +#include <linux/of_fdt.h>
> >>>>>>>>
> >>>>>>>>  #include <asm/bootinfo.h>
> >>>>>>>>  #include <asm/loongson.h>
> >>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>>>>>
> >>>>>>>>         /* Reserve the initrd */
> >>>>>>>>         reserve_initrd_mem();
> >>>>>>>> +
> >>>>>>>> +       /* Mainly reserved memory for the elf core head */
> >>>>>>>> +       early_init_fdt_scan_reserved_mem();
> >>>>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>>>>>> +       early_init_dt_check_for_usable_mem_range();
> >>>>>>>>  }
> >>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>>>>>>         move            s2, a2
> >>>>>>>>         move            s3, a3
> >>>>>>>>
> >>>>>>>> +       /*
> >>>>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
> >>>>>>>> +        * populated as the kernel is directly copied to a reserved location
> >>>>>>>> +        */
> >>>>>>>> +       beqz            s2, done
> >>>>>>>> +
> >>>>>>>>  process_entry:
> >>>>>>>>         PTR_L           s4, s2, 0
> >>>>>>>>         PTR_ADDI        s2, s2, SZREG
> >>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>>>>>> index f938aae3e92c..ea34b77e402f 100644
> >>>>>>>> --- a/arch/loongarch/kernel/setup.c
> >>>>>>>> +++ b/arch/loongarch/kernel/setup.c
> >>>>>>>> @@ -19,6 +19,8 @@
> >>>>>>>>  #include <linux/memblock.h>
> >>>>>>>>  #include <linux/initrd.h>
> >>>>>>>>  #include <linux/ioport.h>
> >>>>>>>> +#include <linux/kexec.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>>  #include <linux/root_dev.h>
> >>>>>>>>  #include <linux/console.h>
> >>>>>>>>  #include <linux/pfn.h>
> >>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>>>>>>  }
> >>>>>>>>  early_param("mem", early_parse_mem);
> >>>>>>>>
> >>>>>>>> +static void __init loongarch_parse_crashkernel(void)
> >>>>>>>> +{
> >>>>>>>> +#ifdef CONFIG_KEXEC
> >>>>>>>> +       unsigned long long start;
> >>>>>>>> +       unsigned long long total_mem;
> >>>>>>>> +       unsigned long long crash_size, crash_base;
> >>>>>>>> +       int ret;
> >>>>>>>> +
> >>>>>>>> +       total_mem = memblock_phys_mem_size();
> >>>>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
> >>>>>>>> +                               &crash_size, &crash_base);
> >>>>>>>> +       if (ret != 0 || crash_size <= 0)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>> +
> >>>>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>>>>>> +                                       crash_base + crash_size);
> >>>>>>>> +       if (start != crash_base) {
> >>>>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>>>>>> +               return;
> >>>>>>>> +       }
> >>>>>>>> +
> >>>>>>>> +       crashk_res.start = crash_base;
> >>>>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
> >>>>>>>> +#endif
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static void __init request_crashkernel(struct resource *res)
> >>>>>>>> +{
> >>>>>>>> +#ifdef CONFIG_KEXEC
> >>>>>>>> +       int ret;
> >>>>>>>> +
> >>>>>>>> +       if (crashk_res.start == crashk_res.end)
> >>>>>>>> +               return;
> >>>>>>>> +
> >>>>>>>> +       ret = request_resource(res, &crashk_res);
> >>>>>>>> +       if (!ret)
> >>>>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>>>>>> +                       (unsigned long)((crashk_res.end -
> >>>>>>>> +                                        crashk_res.start + 1) >> 20),
> >>>>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
> >>>>>>>> +#endif
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>  void __init platform_init(void)
> >>>>>>>>  {
> >>>>>>>>         efi_init();
> >>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>>>>>
> >>>>>>>>         check_kernel_sections_mem();
> >>>>>>>>
> >>>>>>>> +       loongarch_parse_crashkernel();
> >>>>>>>> +
> >>>>>>>>         /*
> >>>>>>>>          * In order to reduce the possibility of kernel panic when failed to
> >>>>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>>>>>>                 request_resource(res, &code_resource);
> >>>>>>>>                 request_resource(res, &data_resource);
> >>>>>>>>                 request_resource(res, &bss_resource);
> >>>>>>>> +               request_crashkernel(res);
> >>>>>>>>         }
> >>>>>>>>  }
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>>>>>> --- a/arch/loongarch/kernel/traps.c
> >>>>>>>> +++ b/arch/loongarch/kernel/traps.c
> >>>>>>>> @@ -10,6 +10,7 @@
> >>>>>>>>  #include <linux/entry-common.h>
> >>>>>>>>  #include <linux/init.h>
> >>>>>>>>  #include <linux/kernel.h>
> >>>>>>>> +#include <linux/kexec.h>
> >>>>>>>>  #include <linux/module.h>
> >>>>>>>>  #include <linux/extable.h>
> >>>>>>>>  #include <linux/mm.h>
> >>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>>>>>
> >>>>>>>>         oops_exit();
> >>>>>>>>
> >>>>>>>> +       if (regs && kexec_should_crash(current))
> >>>>>>>> +               crash_kexec(regs);
> >>>>>>>> +
> >>>>>>>>         if (in_interrupt())
> >>>>>>>>                 panic("Fatal exception in interrupt");
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 2.36.0
> >>>>>>>>
> >>>>>>
> >>>>
> >>
>
Re: [PATCH 2/3] LoongArch: Add kdump support
Posted by Youling Tang 3 years, 7 months ago
Hi, Huacai

On 09/05/2022 09:01 PM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 3:45 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/05/2022 03:32 PM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 09/05/2022 10:14 AM, Huacai Chen wrote:
>>>>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> Hi, Huacai
>>>>>>
>>>>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>>>>>> Hi, Youling,
>>>>>>>
>>>>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>
>>>>>>>> Hi, Huacai
>>>>>>>>
>>>>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>>>>>> Hi, Youling,
>>>>>>>>>
>>>>>>>>> I think crash.c can be merged into crash_dump.c
>>>>>>>>
>>>>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>>>>>> consistency?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Youling
>>>>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>>>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>>>>>
>>>>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>>>>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>>>>>> into machine_kexec.c, should crash_shutdown_secondary and
>>>>>> crash_smp_send_stop be placed in smp.c?
>>>>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>>>>
>>>> Ok, I'll merge all into machine_kexec.c.
>>>>
>>>> Youling.
>>> Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
>>> If you want to skip the "low memory", maybe we can use
>>> 0x9000000090000000 or 0x90000000a0000000?
>>
>> Because there are many holes in our memory layout, if PHYSICAL_START is
>> set to 0x90000000a0000000, the largest reserved area of the crashkernel
>> will be 512M, beyond which it will fail.
> Then 0x9000000090000000 is not suitable, but I think 512M is enough?
> If so, let's use 0x90000000a0000000.

I'm not sure if it's enough for the server machine?
I will change to 0x90000000a0000000.

Youling.
>
> Huacai
>
> Huacai
>>
>> # cat /proc/iomem
>> 90400000-bfffffff : System RAM
>> c0020000-f9efffff : System RAM
>>    f6810000-f6813fff : Reserved
>>
>> The second System RAM starts at 0x90400000, so 0x9000000090000000 will
>> be too small.
>>
>> Youling.
>>
>>>
>>> Huacai
>>>>
>>>>>
>>>>> Huacai
>>>>>>
>>>>>> Youling.
>>>>>>>
>>>>>>> Huacai
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Huacai
>>>>>>>>>
>>>>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>>>
>>>>>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>>>>>> for the crash kernel and jump there on panic.
>>>>>>>>>>
>>>>>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>>>>>
>>>>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>>>>>> memory and filling it in when executing kexec_load().
>>>>>>>>>>
>>>>>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>>>>>
>>>>>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>>>>>> so as not to destroy the original kernel dump data.
>>>>>>>>>>
>>>>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>>>>>> with copy_oldmem_page().
>>>>>>>>>>
>>>>>>>>>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>>>>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>>>>>
>>>>>>>>>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>>>>>>  # echo c > /proc/sysrq_trigger
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>>>>>> ---
>>>>>>>>>>  arch/loongarch/Kconfig                  |  22 ++++++
>>>>>>>>>>  arch/loongarch/Makefile                 |   4 +
>>>>>>>>>>  arch/loongarch/kernel/Makefile          |   3 +-
>>>>>>>>>>  arch/loongarch/kernel/crash.c           | 100 ++++++++++++++++++++++++
>>>>>>>>>>  arch/loongarch/kernel/crash_dump.c      |  19 +++++
>>>>>>>>>>  arch/loongarch/kernel/machine_kexec.c   |  12 ++-
>>>>>>>>>>  arch/loongarch/kernel/mem.c             |   6 ++
>>>>>>>>>>  arch/loongarch/kernel/relocate_kernel.S |   6 ++
>>>>>>>>>>  arch/loongarch/kernel/setup.c           |  49 ++++++++++++
>>>>>>>>>>  arch/loongarch/kernel/traps.c           |   4 +
>>>>>>>>>>  10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>>>>>>  create mode 100644 arch/loongarch/kernel/crash.c
>>>>>>>>>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>>>>>> --- a/arch/loongarch/Kconfig
>>>>>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>>>>>
>>>>>>>>>>           The name comes from the similarity to the exec system call.
>>>>>>>>>>
>>>>>>>>>> +config CRASH_DUMP
>>>>>>>>>> +       bool "Build kdump crash kernel"
>>>>>>>>>> +       help
>>>>>>>>>> +         Generate crash dump after being started by kexec. This should
>>>>>>>>>> +         be normally only set in special crash dump kernels which are
>>>>>>>>>> +         loaded in the main kernel with kexec-tools into a specially
>>>>>>>>>> +         reserved region and then later executed after a crash by
>>>>>>>>>> +         kdump/kexec.
>>>>>>>>>> +
>>>>>>>>>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>>>>>> +
>>>>>>>>>> +config PHYSICAL_START
>>>>>>>>>> +       hex "Physical address where the kernel is loaded"
>>>>>>>>>> +       default "0x9000000091000000" if 64BIT
>>>>>>>>>> +       depends on CRASH_DUMP
>>>>>>>>>> +       help
>>>>>>>>>> +         This gives the XKPRANGE address where the kernel is loaded.
>>>>>>>>>> +         If you plan to use kernel for capturing the crash dump change
>>>>>>>>>> +         this value to start of the reserved region (the "X" value as
>>>>>>>>>> +         specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>>>>>> +         passed to the panic-ed kernel).
>>>>>>>>>> +
>>>>>>>>>>  config SECCOMP
>>>>>>>>>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>>>>>>         depends on PROC_FS
>>>>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>>>>>> --- a/arch/loongarch/Makefile
>>>>>>>>>> +++ b/arch/loongarch/Makefile
>>>>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE         += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>>>>>>  cflags-y += -ffreestanding
>>>>>>>>>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>>>>>
>>>>>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>>>>>> +load-y         = $(CONFIG_PHYSICAL_START)
>>>>>>>>>> +else
>>>>>>>>>>  load-y         = 0x9000000000200000
>>>>>>>>>> +endif
>>>>>>>>>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>>>>>
>>>>>>>>>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>>>>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU)     += fpu.o
>>>>>>>>>>  obj-$(CONFIG_MODULES)          += module.o module-sections.o
>>>>>>>>>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>>>>>>>>>
>>>>>>>>>> -obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>>>>>>>>>> +obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o crash.o
>>>>>>>>>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>>>>>>>>>
>>>>>>>>>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 000000000000..b4f249ec6301
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>>>>>> @@ -0,0 +1,100 @@
>>>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>>>> +/*
>>>>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>>>>>> + *
>>>>>>>>>> + * Derived from MIPS
>>>>>>>>>> + */
>>>>>>>>>> +#include <linux/kernel.h>
>>>>>>>>>> +#include <linux/smp.h>
>>>>>>>>>> +#include <linux/reboot.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>> +#include <linux/delay.h>
>>>>>>>>>> +#include <linux/irq.h>
>>>>>>>>>> +#include <linux/types.h>
>>>>>>>>>> +#include <linux/sched.h>
>>>>>>>>>> +#include <linux/sched/task_stack.h>
>>>>>>>>>> +#include <asm/cacheflush.h>
>>>>>>>>>> +#include <asm/kexec.h>
>>>>>>>>>> +
>>>>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>>>>>> +
>>>>>>>>>> +#ifdef CONFIG_SMP
>>>>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>>>>>> +{
>>>>>>>>>> +       struct pt_regs *regs = passed_regs;
>>>>>>>>>> +       int cpu = smp_processor_id();
>>>>>>>>>> +
>>>>>>>>>> +       /*
>>>>>>>>>> +        * If we are passed registers, use those.  Otherwise get the
>>>>>>>>>> +        * regs from the last interrupt, which should be correct, as
>>>>>>>>>> +        * we are in an interrupt.  But if the regs are not there,
>>>>>>>>>> +        * pull them from the top of the stack.  They are probably
>>>>>>>>>> +        * wrong, but we need something to keep from crashing again.
>>>>>>>>>> +        */
>>>>>>>>>> +       if (!regs)
>>>>>>>>>> +               regs = get_irq_regs();
>>>>>>>>>> +       if (!regs)
>>>>>>>>>> +               regs = task_pt_regs(current);
>>>>>>>>>> +
>>>>>>>>>> +       local_irq_disable();
>>>>>>>>>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>>>>>> +               crash_save_cpu(regs, cpu);
>>>>>>>>>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>>>>>> +
>>>>>>>>>> +       while (!atomic_read(&kexec_ready_to_reboot))
>>>>>>>>>> +               cpu_relax();
>>>>>>>>>> +
>>>>>>>>>> +       kexec_reboot();
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>>>>>> +void crash_smp_send_stop(void)
>>>>>>>>>> +{
>>>>>>>>>> +       static int cpus_stopped;
>>>>>>>>>> +       unsigned long timeout;
>>>>>>>>>> +       unsigned int ncpus;
>>>>>>>>>> +
>>>>>>>>>> +       /*
>>>>>>>>>> +        * This function can be called twice in panic path, but obviously
>>>>>>>>>> +        * we execute this only once.
>>>>>>>>>> +        */
>>>>>>>>>> +       if (cpus_stopped)
>>>>>>>>>> +               return;
>>>>>>>>>> +
>>>>>>>>>> +       cpus_stopped = 1;
>>>>>>>>>> +
>>>>>>>>>> +        /* Excluding the panic cpu */
>>>>>>>>>> +       ncpus = num_online_cpus() - 1;
>>>>>>>>>> +
>>>>>>>>>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>>>>>> +       smp_wmb();
>>>>>>>>>> +
>>>>>>>>>> +       /*
>>>>>>>>>> +        * The crash CPU sends an IPI and wait for other CPUs to
>>>>>>>>>> +        * respond. Delay of at least 10 seconds.
>>>>>>>>>> +        */
>>>>>>>>>> +       pr_emerg("Sending IPI to other cpus...\n");
>>>>>>>>>> +       timeout = USEC_PER_SEC * 10;
>>>>>>>>>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>>>>>> +               cpu_relax();
>>>>>>>>>> +               udelay(1);
>>>>>>>>>> +       }
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +#endif
>>>>>>>>>> +
>>>>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>>>> +{
>>>>>>>>>> +       int crashing_cpu;
>>>>>>>>>> +
>>>>>>>>>> +       local_irq_disable();
>>>>>>>>>> +
>>>>>>>>>> +       crashing_cpu = smp_processor_id();
>>>>>>>>>> +       crash_save_cpu(regs, crashing_cpu);
>>>>>>>>>> +
>>>>>>>>>> +       /* shutdown non-crashing cpus */
>>>>>>>>>> +       crash_smp_send_stop();
>>>>>>>>>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>>>>>> +
>>>>>>>>>> +       pr_info("Starting crashdump kernel...\n");
>>>>>>>>>> +}
>>>>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 000000000000..13e5d2f7870d
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>>>>>> @@ -0,0 +1,19 @@
>>>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>>>> +#include <linux/highmem.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>> +#include <linux/io.h>
>>>>>>>>>> +
>>>>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>>>>>> +                        size_t csize, unsigned long offset)
>>>>>>>>>> +{
>>>>>>>>>> +       void  *vaddr;
>>>>>>>>>> +
>>>>>>>>>> +       if (!csize)
>>>>>>>>>> +               return 0;
>>>>>>>>>> +
>>>>>>>>>> +       vaddr = kmap_local_pfn(pfn);
>>>>>>>>>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>>>>>> +       kunmap_local(vaddr);
>>>>>>>>>> +
>>>>>>>>>> +       return csize;
>>>>>>>>>> +}
>>>>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>>>>>>                 continue;
>>>>>>>>>>         }
>>>>>>>>>>
>>>>>>>>>> -       /* kexec need a safe page to save reboot_code_buffer */
>>>>>>>>>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>>>>>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>>>>>
>>>>>>>>>>         reboot_code_buffer =
>>>>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>>>>>
>>>>>>>>>>         kexec_reboot();
>>>>>>>>>>  }
>>>>>>>>>> -
>>>>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>>>> -{
>>>>>>>>>> -}
>>>>>>>>>>  #endif
>>>>>>>>>>
>>>>>>>>>>  void machine_shutdown(void)
>>>>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>>>
>>>>>>>>>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>>>>>
>>>>>>>>>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>>>>>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>>>
>>>>>>>>>>         /*
>>>>>>>>>>          * The generic kexec code builds a page list with physical
>>>>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>>>
>>>>>>>>>>         /*
>>>>>>>>>>          * We know we were online, and there will be no incoming IPIs at
>>>>>>>>>> -        * this point.
>>>>>>>>>> +        * this point. Mark online again before rebooting so that the crash
>>>>>>>>>> +        * analysis tool will see us correctly.
>>>>>>>>>>          */
>>>>>>>>>>         set_cpu_online(smp_processor_id(), true);
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>>>>>> @@ -5,6 +5,7 @@
>>>>>>>>>>  #include <linux/efi.h>
>>>>>>>>>>  #include <linux/initrd.h>
>>>>>>>>>>  #include <linux/memblock.h>
>>>>>>>>>> +#include <linux/of_fdt.h>
>>>>>>>>>>
>>>>>>>>>>  #include <asm/bootinfo.h>
>>>>>>>>>>  #include <asm/loongson.h>
>>>>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>>>>>
>>>>>>>>>>         /* Reserve the initrd */
>>>>>>>>>>         reserve_initrd_mem();
>>>>>>>>>> +
>>>>>>>>>> +       /* Mainly reserved memory for the elf core head */
>>>>>>>>>> +       early_init_fdt_scan_reserved_mem();
>>>>>>>>>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>>>>>> +       early_init_dt_check_for_usable_mem_range();
>>>>>>>>>>  }
>>>>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>>>>>>         move            s2, a2
>>>>>>>>>>         move            s3, a3
>>>>>>>>>>
>>>>>>>>>> +       /*
>>>>>>>>>> +        * In case of a kdump/crash kernel, the indirection page is not
>>>>>>>>>> +        * populated as the kernel is directly copied to a reserved location
>>>>>>>>>> +        */
>>>>>>>>>> +       beqz            s2, done
>>>>>>>>>> +
>>>>>>>>>>  process_entry:
>>>>>>>>>>         PTR_L           s4, s2, 0
>>>>>>>>>>         PTR_ADDI        s2, s2, SZREG
>>>>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>>>>>> @@ -19,6 +19,8 @@
>>>>>>>>>>  #include <linux/memblock.h>
>>>>>>>>>>  #include <linux/initrd.h>
>>>>>>>>>>  #include <linux/ioport.h>
>>>>>>>>>> +#include <linux/kexec.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>>  #include <linux/root_dev.h>
>>>>>>>>>>  #include <linux/console.h>
>>>>>>>>>>  #include <linux/pfn.h>
>>>>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>>>>>>  }
>>>>>>>>>>  early_param("mem", early_parse_mem);
>>>>>>>>>>
>>>>>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>>>>>> +{
>>>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>>>> +       unsigned long long start;
>>>>>>>>>> +       unsigned long long total_mem;
>>>>>>>>>> +       unsigned long long crash_size, crash_base;
>>>>>>>>>> +       int ret;
>>>>>>>>>> +
>>>>>>>>>> +       total_mem = memblock_phys_mem_size();
>>>>>>>>>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>>>>>> +                               &crash_size, &crash_base);
>>>>>>>>>> +       if (ret != 0 || crash_size <= 0)
>>>>>>>>>> +               return;
>>>>>>>>>> +
>>>>>>>>>> +
>>>>>>>>>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>>>>>> +                                       crash_base + crash_size);
>>>>>>>>>> +       if (start != crash_base) {
>>>>>>>>>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>>>>>> +               return;
>>>>>>>>>> +       }
>>>>>>>>>> +
>>>>>>>>>> +       crashk_res.start = crash_base;
>>>>>>>>>> +       crashk_res.end   = crash_base + crash_size - 1;
>>>>>>>>>> +#endif
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>>>>>> +{
>>>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>>>> +       int ret;
>>>>>>>>>> +
>>>>>>>>>> +       if (crashk_res.start == crashk_res.end)
>>>>>>>>>> +               return;
>>>>>>>>>> +
>>>>>>>>>> +       ret = request_resource(res, &crashk_res);
>>>>>>>>>> +       if (!ret)
>>>>>>>>>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>>>>>> +                       (unsigned long)((crashk_res.end -
>>>>>>>>>> +                                        crashk_res.start + 1) >> 20),
>>>>>>>>>> +                       (unsigned long)(crashk_res.start  >> 20));
>>>>>>>>>> +#endif
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  void __init platform_init(void)
>>>>>>>>>>  {
>>>>>>>>>>         efi_init();
>>>>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>>>>>
>>>>>>>>>>         check_kernel_sections_mem();
>>>>>>>>>>
>>>>>>>>>> +       loongarch_parse_crashkernel();
>>>>>>>>>> +
>>>>>>>>>>         /*
>>>>>>>>>>          * In order to reduce the possibility of kernel panic when failed to
>>>>>>>>>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>>>>>>                 request_resource(res, &code_resource);
>>>>>>>>>>                 request_resource(res, &data_resource);
>>>>>>>>>>                 request_resource(res, &bss_resource);
>>>>>>>>>> +               request_crashkernel(res);
>>>>>>>>>>         }
>>>>>>>>>>  }
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>>>>  #include <linux/entry-common.h>
>>>>>>>>>>  #include <linux/init.h>
>>>>>>>>>>  #include <linux/kernel.h>
>>>>>>>>>> +#include <linux/kexec.h>
>>>>>>>>>>  #include <linux/module.h>
>>>>>>>>>>  #include <linux/extable.h>
>>>>>>>>>>  #include <linux/mm.h>
>>>>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>>>>>
>>>>>>>>>>         oops_exit();
>>>>>>>>>>
>>>>>>>>>> +       if (regs && kexec_should_crash(current))
>>>>>>>>>> +               crash_kexec(regs);
>>>>>>>>>> +
>>>>>>>>>>         if (in_interrupt())
>>>>>>>>>>                 panic("Fatal exception in interrupt");
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>> 2.36.0
>>>>>>>>>>
>>>>>>>>
>>>>>>
>>>>
>>