This patch adds support for kdump, the kernel will reserve a region
for the crash kernel and jump there on panic.
Arch-specific functions are added to allow for implementing a crash
dump file interface, /proc/vmcore, which can be viewed as a ELF file.
A user space tool, like kexec-tools, is responsible for allocating a
separate region for the core's ELF header within crash kdump kernel
memory and filling it in when executing kexec_load().
Then, its location will be advertised to crash dump kernel via a new
device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
the region for later use with fdt_reserve_elfcorehdr() at boot time.
At the same time, it will also limit the crash kdump kernel to the
crashkernel area via a new device-tree property, "linux, usable-memory-range",
so as not to destroy the original kernel dump data.
On crash dump kernel, /proc/vmcore will access the primary kernel's memory
with copy_oldmem_page().
I tested this on LoongArch 3A5000 machine and works as expected (Suggest
crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
triggering a crash through /proc/sysrq_trigger:
$ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
# echo c > /proc/sysrq_trigger
Signed-off-by: Youling Tang <tangyouling@loongson.cn>
---
arch/loongarch/Kconfig | 22 ++++++
arch/loongarch/Makefile | 4 +
arch/loongarch/kernel/Makefile | 3 +-
arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
arch/loongarch/kernel/crash_dump.c | 19 +++++
arch/loongarch/kernel/machine_kexec.c | 12 ++-
arch/loongarch/kernel/mem.c | 6 ++
arch/loongarch/kernel/relocate_kernel.S | 6 ++
arch/loongarch/kernel/setup.c | 49 ++++++++++++
arch/loongarch/kernel/traps.c | 4 +
10 files changed, 217 insertions(+), 8 deletions(-)
create mode 100644 arch/loongarch/kernel/crash.c
create mode 100644 arch/loongarch/kernel/crash_dump.c
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 903c82fa958d..7c1b07a5b5bd 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -420,6 +420,28 @@ config KEXEC
The name comes from the similarity to the exec system call.
+config CRASH_DUMP
+ bool "Build kdump crash kernel"
+ help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/admin-guide/kdump/kdump.rst
+
+config PHYSICAL_START
+ hex "Physical address where the kernel is loaded"
+ default "0x9000000091000000" if 64BIT
+ depends on CRASH_DUMP
+ help
+ This gives the XKPRANGE address where the kernel is loaded.
+ If you plan to use kernel for capturing the crash dump change
+ this value to start of the reserved region (the "X" value as
+ specified in the "crashkernel=YM@XM" command line boot parameter
+ passed to the panic-ed kernel).
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 4bc47f47cfd8..7dabd580426d 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
cflags-y += -ffreestanding
cflags-y += $(call cc-option, -mno-check-zero-division)
+ifdef CONFIG_PHYSICAL_START
+load-y = $(CONFIG_PHYSICAL_START)
+else
load-y = 0x9000000000200000
+endif
bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
drivers-$(CONFIG_PCI) += arch/loongarch/pci/
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 20b64ac3f128..df5aea129364 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
new file mode 100644
index 000000000000..b4f249ec6301
--- /dev/null
+++ b/arch/loongarch/kernel/crash.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS
+ */
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm/cacheflush.h>
+#include <asm/kexec.h>
+
+static cpumask_t cpus_in_crash = CPU_MASK_NONE;
+
+#ifdef CONFIG_SMP
+static void crash_shutdown_secondary(void *passed_regs)
+{
+ struct pt_regs *regs = passed_regs;
+ int cpu = smp_processor_id();
+
+ /*
+ * If we are passed registers, use those. Otherwise get the
+ * regs from the last interrupt, which should be correct, as
+ * we are in an interrupt. But if the regs are not there,
+ * pull them from the top of the stack. They are probably
+ * wrong, but we need something to keep from crashing again.
+ */
+ if (!regs)
+ regs = get_irq_regs();
+ if (!regs)
+ regs = task_pt_regs(current);
+
+ local_irq_disable();
+ if (!cpumask_test_cpu(cpu, &cpus_in_crash))
+ crash_save_cpu(regs, cpu);
+ cpumask_set_cpu(cpu, &cpus_in_crash);
+
+ while (!atomic_read(&kexec_ready_to_reboot))
+ cpu_relax();
+
+ kexec_reboot();
+}
+
+/* Override the weak function in kernel/panic.c */
+void crash_smp_send_stop(void)
+{
+ static int cpus_stopped;
+ unsigned long timeout;
+ unsigned int ncpus;
+
+ /*
+ * This function can be called twice in panic path, but obviously
+ * we execute this only once.
+ */
+ if (cpus_stopped)
+ return;
+
+ cpus_stopped = 1;
+
+ /* Excluding the panic cpu */
+ ncpus = num_online_cpus() - 1;
+
+ smp_call_function(crash_shutdown_secondary, NULL, 0);
+ smp_wmb();
+
+ /*
+ * The crash CPU sends an IPI and wait for other CPUs to
+ * respond. Delay of at least 10 seconds.
+ */
+ pr_emerg("Sending IPI to other cpus...\n");
+ timeout = USEC_PER_SEC * 10;
+ while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
+ cpu_relax();
+ udelay(1);
+ }
+}
+
+#endif
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ int crashing_cpu;
+
+ local_irq_disable();
+
+ crashing_cpu = smp_processor_id();
+ crash_save_cpu(regs, crashing_cpu);
+
+ /* shutdown non-crashing cpus */
+ crash_smp_send_stop();
+ cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
+
+ pr_info("Starting crashdump kernel...\n");
+}
diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
new file mode 100644
index 000000000000..13e5d2f7870d
--- /dev/null
+++ b/arch/loongarch/kernel/crash_dump.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/highmem.h>
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+ size_t csize, unsigned long offset)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = kmap_local_pfn(pfn);
+ csize = copy_to_iter(vaddr + offset, csize, iter);
+ kunmap_local(vaddr);
+
+ return csize;
+}
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
index 4ffcd4cd9c8c..f793a3ff09a3 100644
--- a/arch/loongarch/kernel/machine_kexec.c
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
continue;
}
- /* kexec need a safe page to save reboot_code_buffer */
+ /* kexec/kdump need a safe page to save reboot_code_buffer */
kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
reboot_code_buffer =
@@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
kexec_reboot();
}
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-}
#endif
void machine_shutdown(void)
@@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
jump_addr = (unsigned long)phys_to_virt(image->start);
- first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
/*
* The generic kexec code builds a page list with physical
@@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
/*
* We know we were online, and there will be no incoming IPIs at
- * this point.
+ * this point. Mark online again before rebooting so that the crash
+ * analysis tool will see us correctly.
*/
set_cpu_online(smp_processor_id(), true);
diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
index 7423361b0ebc..c6def6ff81c8 100644
--- a/arch/loongarch/kernel/mem.c
+++ b/arch/loongarch/kernel/mem.c
@@ -5,6 +5,7 @@
#include <linux/efi.h>
#include <linux/initrd.h>
#include <linux/memblock.h>
+#include <linux/of_fdt.h>
#include <asm/bootinfo.h>
#include <asm/loongson.h>
@@ -61,4 +62,9 @@ void __init memblock_init(void)
/* Reserve the initrd */
reserve_initrd_mem();
+
+ /* Mainly reserved memory for the elf core head */
+ early_init_fdt_scan_reserved_mem();
+ /* Parse linux,usable-memory-range is for crash dump kernel */
+ early_init_dt_check_for_usable_mem_range();
}
diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
index d1f242f74ea8..4ee5ac4ac2d7 100644
--- a/arch/loongarch/kernel/relocate_kernel.S
+++ b/arch/loongarch/kernel/relocate_kernel.S
@@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
move s2, a2
move s3, a3
+ /*
+ * In case of a kdump/crash kernel, the indirection page is not
+ * populated as the kernel is directly copied to a reserved location
+ */
+ beqz s2, done
+
process_entry:
PTR_L s4, s2, 0
PTR_ADDI s2, s2, SZREG
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index f938aae3e92c..ea34b77e402f 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -19,6 +19,8 @@
#include <linux/memblock.h>
#include <linux/initrd.h>
#include <linux/ioport.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/pfn.h>
@@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
}
early_param("mem", early_parse_mem);
+static void __init loongarch_parse_crashkernel(void)
+{
+#ifdef CONFIG_KEXEC
+ unsigned long long start;
+ unsigned long long total_mem;
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ total_mem = memblock_phys_mem_size();
+ ret = parse_crashkernel(boot_command_line, total_mem,
+ &crash_size, &crash_base);
+ if (ret != 0 || crash_size <= 0)
+ return;
+
+
+ start = memblock_phys_alloc_range(crash_size, 1, crash_base,
+ crash_base + crash_size);
+ if (start != crash_base) {
+ pr_warn("Invalid memory region reserved for crash kernel\n");
+ return;
+ }
+
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+#endif
+}
+
+static void __init request_crashkernel(struct resource *res)
+{
+#ifdef CONFIG_KEXEC
+ int ret;
+
+ if (crashk_res.start == crashk_res.end)
+ return;
+
+ ret = request_resource(res, &crashk_res);
+ if (!ret)
+ pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
+ (unsigned long)((crashk_res.end -
+ crashk_res.start + 1) >> 20),
+ (unsigned long)(crashk_res.start >> 20));
+#endif
+}
+
void __init platform_init(void)
{
efi_init();
@@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
check_kernel_sections_mem();
+ loongarch_parse_crashkernel();
+
/*
* In order to reduce the possibility of kernel panic when failed to
* get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
@@ -290,6 +338,7 @@ static void __init resource_init(void)
request_resource(res, &code_resource);
request_resource(res, &data_resource);
request_resource(res, &bss_resource);
+ request_crashkernel(res);
}
}
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index aa1c95aaf595..0e610872f3f4 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -10,6 +10,7 @@
#include <linux/entry-common.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/kexec.h>
#include <linux/module.h>
#include <linux/extable.h>
#include <linux/mm.h>
@@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
oops_exit();
+ if (regs && kexec_should_crash(current))
+ crash_kexec(regs);
+
if (in_interrupt())
panic("Fatal exception in interrupt");
--
2.36.0
Hi, Youling,
I think crash.c can be merged into crash_dump.c
Huacai
On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>
> This patch adds support for kdump, the kernel will reserve a region
> for the crash kernel and jump there on panic.
>
> Arch-specific functions are added to allow for implementing a crash
> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>
> A user space tool, like kexec-tools, is responsible for allocating a
> separate region for the core's ELF header within crash kdump kernel
> memory and filling it in when executing kexec_load().
>
> Then, its location will be advertised to crash dump kernel via a new
> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>
> At the same time, it will also limit the crash kdump kernel to the
> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> so as not to destroy the original kernel dump data.
>
> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> with copy_oldmem_page().
>
> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> triggering a crash through /proc/sysrq_trigger:
>
> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> # echo c > /proc/sysrq_trigger
>
> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> ---
> arch/loongarch/Kconfig | 22 ++++++
> arch/loongarch/Makefile | 4 +
> arch/loongarch/kernel/Makefile | 3 +-
> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
> arch/loongarch/kernel/crash_dump.c | 19 +++++
> arch/loongarch/kernel/machine_kexec.c | 12 ++-
> arch/loongarch/kernel/mem.c | 6 ++
> arch/loongarch/kernel/relocate_kernel.S | 6 ++
> arch/loongarch/kernel/setup.c | 49 ++++++++++++
> arch/loongarch/kernel/traps.c | 4 +
> 10 files changed, 217 insertions(+), 8 deletions(-)
> create mode 100644 arch/loongarch/kernel/crash.c
> create mode 100644 arch/loongarch/kernel/crash_dump.c
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 903c82fa958d..7c1b07a5b5bd 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -420,6 +420,28 @@ config KEXEC
>
> The name comes from the similarity to the exec system call.
>
> +config CRASH_DUMP
> + bool "Build kdump crash kernel"
> + help
> + Generate crash dump after being started by kexec. This should
> + be normally only set in special crash dump kernels which are
> + loaded in the main kernel with kexec-tools into a specially
> + reserved region and then later executed after a crash by
> + kdump/kexec.
> +
> + For more details see Documentation/admin-guide/kdump/kdump.rst
> +
> +config PHYSICAL_START
> + hex "Physical address where the kernel is loaded"
> + default "0x9000000091000000" if 64BIT
> + depends on CRASH_DUMP
> + help
> + This gives the XKPRANGE address where the kernel is loaded.
> + If you plan to use kernel for capturing the crash dump change
> + this value to start of the reserved region (the "X" value as
> + specified in the "crashkernel=YM@XM" command line boot parameter
> + passed to the panic-ed kernel).
> +
> config SECCOMP
> bool "Enable seccomp to safely compute untrusted bytecode"
> depends on PROC_FS
> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> index 4bc47f47cfd8..7dabd580426d 100644
> --- a/arch/loongarch/Makefile
> +++ b/arch/loongarch/Makefile
> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> cflags-y += -ffreestanding
> cflags-y += $(call cc-option, -mno-check-zero-division)
>
> +ifdef CONFIG_PHYSICAL_START
> +load-y = $(CONFIG_PHYSICAL_START)
> +else
> load-y = 0x9000000000200000
> +endif
> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
>
> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 20b64ac3f128..df5aea129364 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
> obj-$(CONFIG_MODULES) += module.o module-sections.o
> obj-$(CONFIG_STACKTRACE) += stacktrace.o
>
> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
>
> obj-$(CONFIG_PROC_FS) += proc.o
>
> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> new file mode 100644
> index 000000000000..b4f249ec6301
> --- /dev/null
> +++ b/arch/loongarch/kernel/crash.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> + *
> + * Derived from MIPS
> + */
> +#include <linux/kernel.h>
> +#include <linux/smp.h>
> +#include <linux/reboot.h>
> +#include <linux/crash_dump.h>
> +#include <linux/delay.h>
> +#include <linux/irq.h>
> +#include <linux/types.h>
> +#include <linux/sched.h>
> +#include <linux/sched/task_stack.h>
> +#include <asm/cacheflush.h>
> +#include <asm/kexec.h>
> +
> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> +
> +#ifdef CONFIG_SMP
> +static void crash_shutdown_secondary(void *passed_regs)
> +{
> + struct pt_regs *regs = passed_regs;
> + int cpu = smp_processor_id();
> +
> + /*
> + * If we are passed registers, use those. Otherwise get the
> + * regs from the last interrupt, which should be correct, as
> + * we are in an interrupt. But if the regs are not there,
> + * pull them from the top of the stack. They are probably
> + * wrong, but we need something to keep from crashing again.
> + */
> + if (!regs)
> + regs = get_irq_regs();
> + if (!regs)
> + regs = task_pt_regs(current);
> +
> + local_irq_disable();
> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> + crash_save_cpu(regs, cpu);
> + cpumask_set_cpu(cpu, &cpus_in_crash);
> +
> + while (!atomic_read(&kexec_ready_to_reboot))
> + cpu_relax();
> +
> + kexec_reboot();
> +}
> +
> +/* Override the weak function in kernel/panic.c */
> +void crash_smp_send_stop(void)
> +{
> + static int cpus_stopped;
> + unsigned long timeout;
> + unsigned int ncpus;
> +
> + /*
> + * This function can be called twice in panic path, but obviously
> + * we execute this only once.
> + */
> + if (cpus_stopped)
> + return;
> +
> + cpus_stopped = 1;
> +
> + /* Excluding the panic cpu */
> + ncpus = num_online_cpus() - 1;
> +
> + smp_call_function(crash_shutdown_secondary, NULL, 0);
> + smp_wmb();
> +
> + /*
> + * The crash CPU sends an IPI and wait for other CPUs to
> + * respond. Delay of at least 10 seconds.
> + */
> + pr_emerg("Sending IPI to other cpus...\n");
> + timeout = USEC_PER_SEC * 10;
> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> + cpu_relax();
> + udelay(1);
> + }
> +}
> +
> +#endif
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> + int crashing_cpu;
> +
> + local_irq_disable();
> +
> + crashing_cpu = smp_processor_id();
> + crash_save_cpu(regs, crashing_cpu);
> +
> + /* shutdown non-crashing cpus */
> + crash_smp_send_stop();
> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> +
> + pr_info("Starting crashdump kernel...\n");
> +}
> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> new file mode 100644
> index 000000000000..13e5d2f7870d
> --- /dev/null
> +++ b/arch/loongarch/kernel/crash_dump.c
> @@ -0,0 +1,19 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/highmem.h>
> +#include <linux/crash_dump.h>
> +#include <linux/io.h>
> +
> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> + size_t csize, unsigned long offset)
> +{
> + void *vaddr;
> +
> + if (!csize)
> + return 0;
> +
> + vaddr = kmap_local_pfn(pfn);
> + csize = copy_to_iter(vaddr + offset, csize, iter);
> + kunmap_local(vaddr);
> +
> + return csize;
> +}
> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> --- a/arch/loongarch/kernel/machine_kexec.c
> +++ b/arch/loongarch/kernel/machine_kexec.c
> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> continue;
> }
>
> - /* kexec need a safe page to save reboot_code_buffer */
> + /* kexec/kdump need a safe page to save reboot_code_buffer */
> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>
> reboot_code_buffer =
> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>
> kexec_reboot();
> }
> -
> -void machine_crash_shutdown(struct pt_regs *regs)
> -{
> -}
> #endif
>
> void machine_shutdown(void)
> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>
> jump_addr = (unsigned long)phys_to_virt(image->start);
>
> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> + if (image->type == KEXEC_TYPE_DEFAULT)
> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>
> /*
> * The generic kexec code builds a page list with physical
> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>
> /*
> * We know we were online, and there will be no incoming IPIs at
> - * this point.
> + * this point. Mark online again before rebooting so that the crash
> + * analysis tool will see us correctly.
> */
> set_cpu_online(smp_processor_id(), true);
>
> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> index 7423361b0ebc..c6def6ff81c8 100644
> --- a/arch/loongarch/kernel/mem.c
> +++ b/arch/loongarch/kernel/mem.c
> @@ -5,6 +5,7 @@
> #include <linux/efi.h>
> #include <linux/initrd.h>
> #include <linux/memblock.h>
> +#include <linux/of_fdt.h>
>
> #include <asm/bootinfo.h>
> #include <asm/loongson.h>
> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>
> /* Reserve the initrd */
> reserve_initrd_mem();
> +
> + /* Mainly reserved memory for the elf core head */
> + early_init_fdt_scan_reserved_mem();
> + /* Parse linux,usable-memory-range is for crash dump kernel */
> + early_init_dt_check_for_usable_mem_range();
> }
> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> index d1f242f74ea8..4ee5ac4ac2d7 100644
> --- a/arch/loongarch/kernel/relocate_kernel.S
> +++ b/arch/loongarch/kernel/relocate_kernel.S
> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> move s2, a2
> move s3, a3
>
> + /*
> + * In case of a kdump/crash kernel, the indirection page is not
> + * populated as the kernel is directly copied to a reserved location
> + */
> + beqz s2, done
> +
> process_entry:
> PTR_L s4, s2, 0
> PTR_ADDI s2, s2, SZREG
> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> index f938aae3e92c..ea34b77e402f 100644
> --- a/arch/loongarch/kernel/setup.c
> +++ b/arch/loongarch/kernel/setup.c
> @@ -19,6 +19,8 @@
> #include <linux/memblock.h>
> #include <linux/initrd.h>
> #include <linux/ioport.h>
> +#include <linux/kexec.h>
> +#include <linux/crash_dump.h>
> #include <linux/root_dev.h>
> #include <linux/console.h>
> #include <linux/pfn.h>
> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> }
> early_param("mem", early_parse_mem);
>
> +static void __init loongarch_parse_crashkernel(void)
> +{
> +#ifdef CONFIG_KEXEC
> + unsigned long long start;
> + unsigned long long total_mem;
> + unsigned long long crash_size, crash_base;
> + int ret;
> +
> + total_mem = memblock_phys_mem_size();
> + ret = parse_crashkernel(boot_command_line, total_mem,
> + &crash_size, &crash_base);
> + if (ret != 0 || crash_size <= 0)
> + return;
> +
> +
> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> + crash_base + crash_size);
> + if (start != crash_base) {
> + pr_warn("Invalid memory region reserved for crash kernel\n");
> + return;
> + }
> +
> + crashk_res.start = crash_base;
> + crashk_res.end = crash_base + crash_size - 1;
> +#endif
> +}
> +
> +static void __init request_crashkernel(struct resource *res)
> +{
> +#ifdef CONFIG_KEXEC
> + int ret;
> +
> + if (crashk_res.start == crashk_res.end)
> + return;
> +
> + ret = request_resource(res, &crashk_res);
> + if (!ret)
> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> + (unsigned long)((crashk_res.end -
> + crashk_res.start + 1) >> 20),
> + (unsigned long)(crashk_res.start >> 20));
> +#endif
> +}
> +
> void __init platform_init(void)
> {
> efi_init();
> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>
> check_kernel_sections_mem();
>
> + loongarch_parse_crashkernel();
> +
> /*
> * In order to reduce the possibility of kernel panic when failed to
> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> request_resource(res, &code_resource);
> request_resource(res, &data_resource);
> request_resource(res, &bss_resource);
> + request_crashkernel(res);
> }
> }
>
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index aa1c95aaf595..0e610872f3f4 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -10,6 +10,7 @@
> #include <linux/entry-common.h>
> #include <linux/init.h>
> #include <linux/kernel.h>
> +#include <linux/kexec.h>
> #include <linux/module.h>
> #include <linux/extable.h>
> #include <linux/mm.h>
> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>
> oops_exit();
>
> + if (regs && kexec_should_crash(current))
> + crash_kexec(regs);
> +
> if (in_interrupt())
> panic("Fatal exception in interrupt");
>
> --
> 2.36.0
>
Hi, Huacai
On 09/04/2022 08:21 PM, Huacai Chen wrote:
> Hi, Youling,
>
> I think crash.c can be merged into crash_dump.c
Most architectures only implement copy_oldmem_page() in crash_dump.c,
I'm not sure if merging crash.c into crash_dump.c will break its
consistency?
Thanks,
Youling
>
> Huacai
>
> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> This patch adds support for kdump, the kernel will reserve a region
>> for the crash kernel and jump there on panic.
>>
>> Arch-specific functions are added to allow for implementing a crash
>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>
>> A user space tool, like kexec-tools, is responsible for allocating a
>> separate region for the core's ELF header within crash kdump kernel
>> memory and filling it in when executing kexec_load().
>>
>> Then, its location will be advertised to crash dump kernel via a new
>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>
>> At the same time, it will also limit the crash kdump kernel to the
>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>> so as not to destroy the original kernel dump data.
>>
>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>> with copy_oldmem_page().
>>
>> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>> triggering a crash through /proc/sysrq_trigger:
>>
>> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>> # echo c > /proc/sysrq_trigger
>>
>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>> ---
>> arch/loongarch/Kconfig | 22 ++++++
>> arch/loongarch/Makefile | 4 +
>> arch/loongarch/kernel/Makefile | 3 +-
>> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
>> arch/loongarch/kernel/crash_dump.c | 19 +++++
>> arch/loongarch/kernel/machine_kexec.c | 12 ++-
>> arch/loongarch/kernel/mem.c | 6 ++
>> arch/loongarch/kernel/relocate_kernel.S | 6 ++
>> arch/loongarch/kernel/setup.c | 49 ++++++++++++
>> arch/loongarch/kernel/traps.c | 4 +
>> 10 files changed, 217 insertions(+), 8 deletions(-)
>> create mode 100644 arch/loongarch/kernel/crash.c
>> create mode 100644 arch/loongarch/kernel/crash_dump.c
>>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 903c82fa958d..7c1b07a5b5bd 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -420,6 +420,28 @@ config KEXEC
>>
>> The name comes from the similarity to the exec system call.
>>
>> +config CRASH_DUMP
>> + bool "Build kdump crash kernel"
>> + help
>> + Generate crash dump after being started by kexec. This should
>> + be normally only set in special crash dump kernels which are
>> + loaded in the main kernel with kexec-tools into a specially
>> + reserved region and then later executed after a crash by
>> + kdump/kexec.
>> +
>> + For more details see Documentation/admin-guide/kdump/kdump.rst
>> +
>> +config PHYSICAL_START
>> + hex "Physical address where the kernel is loaded"
>> + default "0x9000000091000000" if 64BIT
>> + depends on CRASH_DUMP
>> + help
>> + This gives the XKPRANGE address where the kernel is loaded.
>> + If you plan to use kernel for capturing the crash dump change
>> + this value to start of the reserved region (the "X" value as
>> + specified in the "crashkernel=YM@XM" command line boot parameter
>> + passed to the panic-ed kernel).
>> +
>> config SECCOMP
>> bool "Enable seccomp to safely compute untrusted bytecode"
>> depends on PROC_FS
>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>> index 4bc47f47cfd8..7dabd580426d 100644
>> --- a/arch/loongarch/Makefile
>> +++ b/arch/loongarch/Makefile
>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>> cflags-y += -ffreestanding
>> cflags-y += $(call cc-option, -mno-check-zero-division)
>>
>> +ifdef CONFIG_PHYSICAL_START
>> +load-y = $(CONFIG_PHYSICAL_START)
>> +else
>> load-y = 0x9000000000200000
>> +endif
>> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
>>
>> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>> index 20b64ac3f128..df5aea129364 100644
>> --- a/arch/loongarch/kernel/Makefile
>> +++ b/arch/loongarch/kernel/Makefile
>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
>> obj-$(CONFIG_MODULES) += module.o module-sections.o
>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
>>
>> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
>> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
>>
>> obj-$(CONFIG_PROC_FS) += proc.o
>>
>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>> new file mode 100644
>> index 000000000000..b4f249ec6301
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/crash.c
>> @@ -0,0 +1,100 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>> + *
>> + * Derived from MIPS
>> + */
>> +#include <linux/kernel.h>
>> +#include <linux/smp.h>
>> +#include <linux/reboot.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/delay.h>
>> +#include <linux/irq.h>
>> +#include <linux/types.h>
>> +#include <linux/sched.h>
>> +#include <linux/sched/task_stack.h>
>> +#include <asm/cacheflush.h>
>> +#include <asm/kexec.h>
>> +
>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>> +
>> +#ifdef CONFIG_SMP
>> +static void crash_shutdown_secondary(void *passed_regs)
>> +{
>> + struct pt_regs *regs = passed_regs;
>> + int cpu = smp_processor_id();
>> +
>> + /*
>> + * If we are passed registers, use those. Otherwise get the
>> + * regs from the last interrupt, which should be correct, as
>> + * we are in an interrupt. But if the regs are not there,
>> + * pull them from the top of the stack. They are probably
>> + * wrong, but we need something to keep from crashing again.
>> + */
>> + if (!regs)
>> + regs = get_irq_regs();
>> + if (!regs)
>> + regs = task_pt_regs(current);
>> +
>> + local_irq_disable();
>> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>> + crash_save_cpu(regs, cpu);
>> + cpumask_set_cpu(cpu, &cpus_in_crash);
>> +
>> + while (!atomic_read(&kexec_ready_to_reboot))
>> + cpu_relax();
>> +
>> + kexec_reboot();
>> +}
>> +
>> +/* Override the weak function in kernel/panic.c */
>> +void crash_smp_send_stop(void)
>> +{
>> + static int cpus_stopped;
>> + unsigned long timeout;
>> + unsigned int ncpus;
>> +
>> + /*
>> + * This function can be called twice in panic path, but obviously
>> + * we execute this only once.
>> + */
>> + if (cpus_stopped)
>> + return;
>> +
>> + cpus_stopped = 1;
>> +
>> + /* Excluding the panic cpu */
>> + ncpus = num_online_cpus() - 1;
>> +
>> + smp_call_function(crash_shutdown_secondary, NULL, 0);
>> + smp_wmb();
>> +
>> + /*
>> + * The crash CPU sends an IPI and wait for other CPUs to
>> + * respond. Delay of at least 10 seconds.
>> + */
>> + pr_emerg("Sending IPI to other cpus...\n");
>> + timeout = USEC_PER_SEC * 10;
>> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>> + cpu_relax();
>> + udelay(1);
>> + }
>> +}
>> +
>> +#endif
>> +
>> +void machine_crash_shutdown(struct pt_regs *regs)
>> +{
>> + int crashing_cpu;
>> +
>> + local_irq_disable();
>> +
>> + crashing_cpu = smp_processor_id();
>> + crash_save_cpu(regs, crashing_cpu);
>> +
>> + /* shutdown non-crashing cpus */
>> + crash_smp_send_stop();
>> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>> +
>> + pr_info("Starting crashdump kernel...\n");
>> +}
>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>> new file mode 100644
>> index 000000000000..13e5d2f7870d
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/crash_dump.c
>> @@ -0,0 +1,19 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#include <linux/highmem.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/io.h>
>> +
>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>> + size_t csize, unsigned long offset)
>> +{
>> + void *vaddr;
>> +
>> + if (!csize)
>> + return 0;
>> +
>> + vaddr = kmap_local_pfn(pfn);
>> + csize = copy_to_iter(vaddr + offset, csize, iter);
>> + kunmap_local(vaddr);
>> +
>> + return csize;
>> +}
>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>> --- a/arch/loongarch/kernel/machine_kexec.c
>> +++ b/arch/loongarch/kernel/machine_kexec.c
>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>> continue;
>> }
>>
>> - /* kexec need a safe page to save reboot_code_buffer */
>> + /* kexec/kdump need a safe page to save reboot_code_buffer */
>> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>
>> reboot_code_buffer =
>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>
>> kexec_reboot();
>> }
>> -
>> -void machine_crash_shutdown(struct pt_regs *regs)
>> -{
>> -}
>> #endif
>>
>> void machine_shutdown(void)
>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>
>> jump_addr = (unsigned long)phys_to_virt(image->start);
>>
>> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>> + if (image->type == KEXEC_TYPE_DEFAULT)
>> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>
>> /*
>> * The generic kexec code builds a page list with physical
>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>
>> /*
>> * We know we were online, and there will be no incoming IPIs at
>> - * this point.
>> + * this point. Mark online again before rebooting so that the crash
>> + * analysis tool will see us correctly.
>> */
>> set_cpu_online(smp_processor_id(), true);
>>
>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>> index 7423361b0ebc..c6def6ff81c8 100644
>> --- a/arch/loongarch/kernel/mem.c
>> +++ b/arch/loongarch/kernel/mem.c
>> @@ -5,6 +5,7 @@
>> #include <linux/efi.h>
>> #include <linux/initrd.h>
>> #include <linux/memblock.h>
>> +#include <linux/of_fdt.h>
>>
>> #include <asm/bootinfo.h>
>> #include <asm/loongson.h>
>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>
>> /* Reserve the initrd */
>> reserve_initrd_mem();
>> +
>> + /* Mainly reserved memory for the elf core head */
>> + early_init_fdt_scan_reserved_mem();
>> + /* Parse linux,usable-memory-range is for crash dump kernel */
>> + early_init_dt_check_for_usable_mem_range();
>> }
>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>> --- a/arch/loongarch/kernel/relocate_kernel.S
>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>> move s2, a2
>> move s3, a3
>>
>> + /*
>> + * In case of a kdump/crash kernel, the indirection page is not
>> + * populated as the kernel is directly copied to a reserved location
>> + */
>> + beqz s2, done
>> +
>> process_entry:
>> PTR_L s4, s2, 0
>> PTR_ADDI s2, s2, SZREG
>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>> index f938aae3e92c..ea34b77e402f 100644
>> --- a/arch/loongarch/kernel/setup.c
>> +++ b/arch/loongarch/kernel/setup.c
>> @@ -19,6 +19,8 @@
>> #include <linux/memblock.h>
>> #include <linux/initrd.h>
>> #include <linux/ioport.h>
>> +#include <linux/kexec.h>
>> +#include <linux/crash_dump.h>
>> #include <linux/root_dev.h>
>> #include <linux/console.h>
>> #include <linux/pfn.h>
>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>> }
>> early_param("mem", early_parse_mem);
>>
>> +static void __init loongarch_parse_crashkernel(void)
>> +{
>> +#ifdef CONFIG_KEXEC
>> + unsigned long long start;
>> + unsigned long long total_mem;
>> + unsigned long long crash_size, crash_base;
>> + int ret;
>> +
>> + total_mem = memblock_phys_mem_size();
>> + ret = parse_crashkernel(boot_command_line, total_mem,
>> + &crash_size, &crash_base);
>> + if (ret != 0 || crash_size <= 0)
>> + return;
>> +
>> +
>> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>> + crash_base + crash_size);
>> + if (start != crash_base) {
>> + pr_warn("Invalid memory region reserved for crash kernel\n");
>> + return;
>> + }
>> +
>> + crashk_res.start = crash_base;
>> + crashk_res.end = crash_base + crash_size - 1;
>> +#endif
>> +}
>> +
>> +static void __init request_crashkernel(struct resource *res)
>> +{
>> +#ifdef CONFIG_KEXEC
>> + int ret;
>> +
>> + if (crashk_res.start == crashk_res.end)
>> + return;
>> +
>> + ret = request_resource(res, &crashk_res);
>> + if (!ret)
>> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>> + (unsigned long)((crashk_res.end -
>> + crashk_res.start + 1) >> 20),
>> + (unsigned long)(crashk_res.start >> 20));
>> +#endif
>> +}
>> +
>> void __init platform_init(void)
>> {
>> efi_init();
>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>
>> check_kernel_sections_mem();
>>
>> + loongarch_parse_crashkernel();
>> +
>> /*
>> * In order to reduce the possibility of kernel panic when failed to
>> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>> request_resource(res, &code_resource);
>> request_resource(res, &data_resource);
>> request_resource(res, &bss_resource);
>> + request_crashkernel(res);
>> }
>> }
>>
>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>> index aa1c95aaf595..0e610872f3f4 100644
>> --- a/arch/loongarch/kernel/traps.c
>> +++ b/arch/loongarch/kernel/traps.c
>> @@ -10,6 +10,7 @@
>> #include <linux/entry-common.h>
>> #include <linux/init.h>
>> #include <linux/kernel.h>
>> +#include <linux/kexec.h>
>> #include <linux/module.h>
>> #include <linux/extable.h>
>> #include <linux/mm.h>
>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>
>> oops_exit();
>>
>> + if (regs && kexec_should_crash(current))
>> + crash_kexec(regs);
>> +
>> if (in_interrupt())
>> panic("Fatal exception in interrupt");
>>
>> --
>> 2.36.0
>>
Hi, Youling,
On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > I think crash.c can be merged into crash_dump.c
>
> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> I'm not sure if merging crash.c into crash_dump.c will break its
> consistency?
>
> Thanks,
> Youling
Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
can be merged into machine_kexec.c, as arm64 and riscv do.
Huacai
>
> >
> > Huacai
> >
> > On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> This patch adds support for kdump, the kernel will reserve a region
> >> for the crash kernel and jump there on panic.
> >>
> >> Arch-specific functions are added to allow for implementing a crash
> >> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>
> >> A user space tool, like kexec-tools, is responsible for allocating a
> >> separate region for the core's ELF header within crash kdump kernel
> >> memory and filling it in when executing kexec_load().
> >>
> >> Then, its location will be advertised to crash dump kernel via a new
> >> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>
> >> At the same time, it will also limit the crash kdump kernel to the
> >> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >> so as not to destroy the original kernel dump data.
> >>
> >> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >> with copy_oldmem_page().
> >>
> >> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
> >> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >> triggering a crash through /proc/sysrq_trigger:
> >>
> >> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >> # echo c > /proc/sysrq_trigger
> >>
> >> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >> ---
> >> arch/loongarch/Kconfig | 22 ++++++
> >> arch/loongarch/Makefile | 4 +
> >> arch/loongarch/kernel/Makefile | 3 +-
> >> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
> >> arch/loongarch/kernel/crash_dump.c | 19 +++++
> >> arch/loongarch/kernel/machine_kexec.c | 12 ++-
> >> arch/loongarch/kernel/mem.c | 6 ++
> >> arch/loongarch/kernel/relocate_kernel.S | 6 ++
> >> arch/loongarch/kernel/setup.c | 49 ++++++++++++
> >> arch/loongarch/kernel/traps.c | 4 +
> >> 10 files changed, 217 insertions(+), 8 deletions(-)
> >> create mode 100644 arch/loongarch/kernel/crash.c
> >> create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>
> >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >> index 903c82fa958d..7c1b07a5b5bd 100644
> >> --- a/arch/loongarch/Kconfig
> >> +++ b/arch/loongarch/Kconfig
> >> @@ -420,6 +420,28 @@ config KEXEC
> >>
> >> The name comes from the similarity to the exec system call.
> >>
> >> +config CRASH_DUMP
> >> + bool "Build kdump crash kernel"
> >> + help
> >> + Generate crash dump after being started by kexec. This should
> >> + be normally only set in special crash dump kernels which are
> >> + loaded in the main kernel with kexec-tools into a specially
> >> + reserved region and then later executed after a crash by
> >> + kdump/kexec.
> >> +
> >> + For more details see Documentation/admin-guide/kdump/kdump.rst
> >> +
> >> +config PHYSICAL_START
> >> + hex "Physical address where the kernel is loaded"
> >> + default "0x9000000091000000" if 64BIT
> >> + depends on CRASH_DUMP
> >> + help
> >> + This gives the XKPRANGE address where the kernel is loaded.
> >> + If you plan to use kernel for capturing the crash dump change
> >> + this value to start of the reserved region (the "X" value as
> >> + specified in the "crashkernel=YM@XM" command line boot parameter
> >> + passed to the panic-ed kernel).
> >> +
> >> config SECCOMP
> >> bool "Enable seccomp to safely compute untrusted bytecode"
> >> depends on PROC_FS
> >> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >> index 4bc47f47cfd8..7dabd580426d 100644
> >> --- a/arch/loongarch/Makefile
> >> +++ b/arch/loongarch/Makefile
> >> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >> cflags-y += -ffreestanding
> >> cflags-y += $(call cc-option, -mno-check-zero-division)
> >>
> >> +ifdef CONFIG_PHYSICAL_START
> >> +load-y = $(CONFIG_PHYSICAL_START)
> >> +else
> >> load-y = 0x9000000000200000
> >> +endif
> >> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>
> >> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
> >> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >> index 20b64ac3f128..df5aea129364 100644
> >> --- a/arch/loongarch/kernel/Makefile
> >> +++ b/arch/loongarch/kernel/Makefile
> >> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
> >> obj-$(CONFIG_MODULES) += module.o module-sections.o
> >> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> >>
> >> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
> >> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
> >> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
> >>
> >> obj-$(CONFIG_PROC_FS) += proc.o
> >>
> >> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >> new file mode 100644
> >> index 000000000000..b4f249ec6301
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/crash.c
> >> @@ -0,0 +1,100 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >> + *
> >> + * Derived from MIPS
> >> + */
> >> +#include <linux/kernel.h>
> >> +#include <linux/smp.h>
> >> +#include <linux/reboot.h>
> >> +#include <linux/crash_dump.h>
> >> +#include <linux/delay.h>
> >> +#include <linux/irq.h>
> >> +#include <linux/types.h>
> >> +#include <linux/sched.h>
> >> +#include <linux/sched/task_stack.h>
> >> +#include <asm/cacheflush.h>
> >> +#include <asm/kexec.h>
> >> +
> >> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >> +
> >> +#ifdef CONFIG_SMP
> >> +static void crash_shutdown_secondary(void *passed_regs)
> >> +{
> >> + struct pt_regs *regs = passed_regs;
> >> + int cpu = smp_processor_id();
> >> +
> >> + /*
> >> + * If we are passed registers, use those. Otherwise get the
> >> + * regs from the last interrupt, which should be correct, as
> >> + * we are in an interrupt. But if the regs are not there,
> >> + * pull them from the top of the stack. They are probably
> >> + * wrong, but we need something to keep from crashing again.
> >> + */
> >> + if (!regs)
> >> + regs = get_irq_regs();
> >> + if (!regs)
> >> + regs = task_pt_regs(current);
> >> +
> >> + local_irq_disable();
> >> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >> + crash_save_cpu(regs, cpu);
> >> + cpumask_set_cpu(cpu, &cpus_in_crash);
> >> +
> >> + while (!atomic_read(&kexec_ready_to_reboot))
> >> + cpu_relax();
> >> +
> >> + kexec_reboot();
> >> +}
> >> +
> >> +/* Override the weak function in kernel/panic.c */
> >> +void crash_smp_send_stop(void)
> >> +{
> >> + static int cpus_stopped;
> >> + unsigned long timeout;
> >> + unsigned int ncpus;
> >> +
> >> + /*
> >> + * This function can be called twice in panic path, but obviously
> >> + * we execute this only once.
> >> + */
> >> + if (cpus_stopped)
> >> + return;
> >> +
> >> + cpus_stopped = 1;
> >> +
> >> + /* Excluding the panic cpu */
> >> + ncpus = num_online_cpus() - 1;
> >> +
> >> + smp_call_function(crash_shutdown_secondary, NULL, 0);
> >> + smp_wmb();
> >> +
> >> + /*
> >> + * The crash CPU sends an IPI and wait for other CPUs to
> >> + * respond. Delay of at least 10 seconds.
> >> + */
> >> + pr_emerg("Sending IPI to other cpus...\n");
> >> + timeout = USEC_PER_SEC * 10;
> >> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >> + cpu_relax();
> >> + udelay(1);
> >> + }
> >> +}
> >> +
> >> +#endif
> >> +
> >> +void machine_crash_shutdown(struct pt_regs *regs)
> >> +{
> >> + int crashing_cpu;
> >> +
> >> + local_irq_disable();
> >> +
> >> + crashing_cpu = smp_processor_id();
> >> + crash_save_cpu(regs, crashing_cpu);
> >> +
> >> + /* shutdown non-crashing cpus */
> >> + crash_smp_send_stop();
> >> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >> +
> >> + pr_info("Starting crashdump kernel...\n");
> >> +}
> >> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >> new file mode 100644
> >> index 000000000000..13e5d2f7870d
> >> --- /dev/null
> >> +++ b/arch/loongarch/kernel/crash_dump.c
> >> @@ -0,0 +1,19 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +#include <linux/highmem.h>
> >> +#include <linux/crash_dump.h>
> >> +#include <linux/io.h>
> >> +
> >> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >> + size_t csize, unsigned long offset)
> >> +{
> >> + void *vaddr;
> >> +
> >> + if (!csize)
> >> + return 0;
> >> +
> >> + vaddr = kmap_local_pfn(pfn);
> >> + csize = copy_to_iter(vaddr + offset, csize, iter);
> >> + kunmap_local(vaddr);
> >> +
> >> + return csize;
> >> +}
> >> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >> --- a/arch/loongarch/kernel/machine_kexec.c
> >> +++ b/arch/loongarch/kernel/machine_kexec.c
> >> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >> continue;
> >> }
> >>
> >> - /* kexec need a safe page to save reboot_code_buffer */
> >> + /* kexec/kdump need a safe page to save reboot_code_buffer */
> >> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>
> >> reboot_code_buffer =
> >> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>
> >> kexec_reboot();
> >> }
> >> -
> >> -void machine_crash_shutdown(struct pt_regs *regs)
> >> -{
> >> -}
> >> #endif
> >>
> >> void machine_shutdown(void)
> >> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>
> >> jump_addr = (unsigned long)phys_to_virt(image->start);
> >>
> >> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >> + if (image->type == KEXEC_TYPE_DEFAULT)
> >> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>
> >> /*
> >> * The generic kexec code builds a page list with physical
> >> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>
> >> /*
> >> * We know we were online, and there will be no incoming IPIs at
> >> - * this point.
> >> + * this point. Mark online again before rebooting so that the crash
> >> + * analysis tool will see us correctly.
> >> */
> >> set_cpu_online(smp_processor_id(), true);
> >>
> >> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >> index 7423361b0ebc..c6def6ff81c8 100644
> >> --- a/arch/loongarch/kernel/mem.c
> >> +++ b/arch/loongarch/kernel/mem.c
> >> @@ -5,6 +5,7 @@
> >> #include <linux/efi.h>
> >> #include <linux/initrd.h>
> >> #include <linux/memblock.h>
> >> +#include <linux/of_fdt.h>
> >>
> >> #include <asm/bootinfo.h>
> >> #include <asm/loongson.h>
> >> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>
> >> /* Reserve the initrd */
> >> reserve_initrd_mem();
> >> +
> >> + /* Mainly reserved memory for the elf core head */
> >> + early_init_fdt_scan_reserved_mem();
> >> + /* Parse linux,usable-memory-range is for crash dump kernel */
> >> + early_init_dt_check_for_usable_mem_range();
> >> }
> >> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >> --- a/arch/loongarch/kernel/relocate_kernel.S
> >> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >> move s2, a2
> >> move s3, a3
> >>
> >> + /*
> >> + * In case of a kdump/crash kernel, the indirection page is not
> >> + * populated as the kernel is directly copied to a reserved location
> >> + */
> >> + beqz s2, done
> >> +
> >> process_entry:
> >> PTR_L s4, s2, 0
> >> PTR_ADDI s2, s2, SZREG
> >> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >> index f938aae3e92c..ea34b77e402f 100644
> >> --- a/arch/loongarch/kernel/setup.c
> >> +++ b/arch/loongarch/kernel/setup.c
> >> @@ -19,6 +19,8 @@
> >> #include <linux/memblock.h>
> >> #include <linux/initrd.h>
> >> #include <linux/ioport.h>
> >> +#include <linux/kexec.h>
> >> +#include <linux/crash_dump.h>
> >> #include <linux/root_dev.h>
> >> #include <linux/console.h>
> >> #include <linux/pfn.h>
> >> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >> }
> >> early_param("mem", early_parse_mem);
> >>
> >> +static void __init loongarch_parse_crashkernel(void)
> >> +{
> >> +#ifdef CONFIG_KEXEC
> >> + unsigned long long start;
> >> + unsigned long long total_mem;
> >> + unsigned long long crash_size, crash_base;
> >> + int ret;
> >> +
> >> + total_mem = memblock_phys_mem_size();
> >> + ret = parse_crashkernel(boot_command_line, total_mem,
> >> + &crash_size, &crash_base);
> >> + if (ret != 0 || crash_size <= 0)
> >> + return;
> >> +
> >> +
> >> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >> + crash_base + crash_size);
> >> + if (start != crash_base) {
> >> + pr_warn("Invalid memory region reserved for crash kernel\n");
> >> + return;
> >> + }
> >> +
> >> + crashk_res.start = crash_base;
> >> + crashk_res.end = crash_base + crash_size - 1;
> >> +#endif
> >> +}
> >> +
> >> +static void __init request_crashkernel(struct resource *res)
> >> +{
> >> +#ifdef CONFIG_KEXEC
> >> + int ret;
> >> +
> >> + if (crashk_res.start == crashk_res.end)
> >> + return;
> >> +
> >> + ret = request_resource(res, &crashk_res);
> >> + if (!ret)
> >> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >> + (unsigned long)((crashk_res.end -
> >> + crashk_res.start + 1) >> 20),
> >> + (unsigned long)(crashk_res.start >> 20));
> >> +#endif
> >> +}
> >> +
> >> void __init platform_init(void)
> >> {
> >> efi_init();
> >> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>
> >> check_kernel_sections_mem();
> >>
> >> + loongarch_parse_crashkernel();
> >> +
> >> /*
> >> * In order to reduce the possibility of kernel panic when failed to
> >> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >> request_resource(res, &code_resource);
> >> request_resource(res, &data_resource);
> >> request_resource(res, &bss_resource);
> >> + request_crashkernel(res);
> >> }
> >> }
> >>
> >> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >> index aa1c95aaf595..0e610872f3f4 100644
> >> --- a/arch/loongarch/kernel/traps.c
> >> +++ b/arch/loongarch/kernel/traps.c
> >> @@ -10,6 +10,7 @@
> >> #include <linux/entry-common.h>
> >> #include <linux/init.h>
> >> #include <linux/kernel.h>
> >> +#include <linux/kexec.h>
> >> #include <linux/module.h>
> >> #include <linux/extable.h>
> >> #include <linux/mm.h>
> >> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>
> >> oops_exit();
> >>
> >> + if (regs && kexec_should_crash(current))
> >> + crash_kexec(regs);
> >> +
> >> if (in_interrupt())
> >> panic("Fatal exception in interrupt");
> >>
> >> --
> >> 2.36.0
> >>
>
Hi, Huacai
On 09/05/2022 09:38 AM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> I think crash.c can be merged into crash_dump.c
>>
>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>> I'm not sure if merging crash.c into crash_dump.c will break its
>> consistency?
>>
>> Thanks,
>> Youling
> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> can be merged into machine_kexec.c, as arm64 and riscv do.
For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
into machine_kexec.c, should crash_shutdown_secondary and
crash_smp_send_stop be placed in smp.c?
Youling.
>
> Huacai
>>
>>>
>>> Huacai
>>>
>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> This patch adds support for kdump, the kernel will reserve a region
>>>> for the crash kernel and jump there on panic.
>>>>
>>>> Arch-specific functions are added to allow for implementing a crash
>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>
>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>> separate region for the core's ELF header within crash kdump kernel
>>>> memory and filling it in when executing kexec_load().
>>>>
>>>> Then, its location will be advertised to crash dump kernel via a new
>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>
>>>> At the same time, it will also limit the crash kdump kernel to the
>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>> so as not to destroy the original kernel dump data.
>>>>
>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>> with copy_oldmem_page().
>>>>
>>>> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>> triggering a crash through /proc/sysrq_trigger:
>>>>
>>>> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>> # echo c > /proc/sysrq_trigger
>>>>
>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>> ---
>>>> arch/loongarch/Kconfig | 22 ++++++
>>>> arch/loongarch/Makefile | 4 +
>>>> arch/loongarch/kernel/Makefile | 3 +-
>>>> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
>>>> arch/loongarch/kernel/crash_dump.c | 19 +++++
>>>> arch/loongarch/kernel/machine_kexec.c | 12 ++-
>>>> arch/loongarch/kernel/mem.c | 6 ++
>>>> arch/loongarch/kernel/relocate_kernel.S | 6 ++
>>>> arch/loongarch/kernel/setup.c | 49 ++++++++++++
>>>> arch/loongarch/kernel/traps.c | 4 +
>>>> 10 files changed, 217 insertions(+), 8 deletions(-)
>>>> create mode 100644 arch/loongarch/kernel/crash.c
>>>> create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>
>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>> --- a/arch/loongarch/Kconfig
>>>> +++ b/arch/loongarch/Kconfig
>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>
>>>> The name comes from the similarity to the exec system call.
>>>>
>>>> +config CRASH_DUMP
>>>> + bool "Build kdump crash kernel"
>>>> + help
>>>> + Generate crash dump after being started by kexec. This should
>>>> + be normally only set in special crash dump kernels which are
>>>> + loaded in the main kernel with kexec-tools into a specially
>>>> + reserved region and then later executed after a crash by
>>>> + kdump/kexec.
>>>> +
>>>> + For more details see Documentation/admin-guide/kdump/kdump.rst
>>>> +
>>>> +config PHYSICAL_START
>>>> + hex "Physical address where the kernel is loaded"
>>>> + default "0x9000000091000000" if 64BIT
>>>> + depends on CRASH_DUMP
>>>> + help
>>>> + This gives the XKPRANGE address where the kernel is loaded.
>>>> + If you plan to use kernel for capturing the crash dump change
>>>> + this value to start of the reserved region (the "X" value as
>>>> + specified in the "crashkernel=YM@XM" command line boot parameter
>>>> + passed to the panic-ed kernel).
>>>> +
>>>> config SECCOMP
>>>> bool "Enable seccomp to safely compute untrusted bytecode"
>>>> depends on PROC_FS
>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>> --- a/arch/loongarch/Makefile
>>>> +++ b/arch/loongarch/Makefile
>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>> cflags-y += -ffreestanding
>>>> cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>
>>>> +ifdef CONFIG_PHYSICAL_START
>>>> +load-y = $(CONFIG_PHYSICAL_START)
>>>> +else
>>>> load-y = 0x9000000000200000
>>>> +endif
>>>> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>
>>>> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>> index 20b64ac3f128..df5aea129364 100644
>>>> --- a/arch/loongarch/kernel/Makefile
>>>> +++ b/arch/loongarch/kernel/Makefile
>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
>>>> obj-$(CONFIG_MODULES) += module.o module-sections.o
>>>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
>>>>
>>>> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
>>>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
>>>> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
>>>>
>>>> obj-$(CONFIG_PROC_FS) += proc.o
>>>>
>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>> new file mode 100644
>>>> index 000000000000..b4f249ec6301
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/kernel/crash.c
>>>> @@ -0,0 +1,100 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>> + *
>>>> + * Derived from MIPS
>>>> + */
>>>> +#include <linux/kernel.h>
>>>> +#include <linux/smp.h>
>>>> +#include <linux/reboot.h>
>>>> +#include <linux/crash_dump.h>
>>>> +#include <linux/delay.h>
>>>> +#include <linux/irq.h>
>>>> +#include <linux/types.h>
>>>> +#include <linux/sched.h>
>>>> +#include <linux/sched/task_stack.h>
>>>> +#include <asm/cacheflush.h>
>>>> +#include <asm/kexec.h>
>>>> +
>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>> +
>>>> +#ifdef CONFIG_SMP
>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>> +{
>>>> + struct pt_regs *regs = passed_regs;
>>>> + int cpu = smp_processor_id();
>>>> +
>>>> + /*
>>>> + * If we are passed registers, use those. Otherwise get the
>>>> + * regs from the last interrupt, which should be correct, as
>>>> + * we are in an interrupt. But if the regs are not there,
>>>> + * pull them from the top of the stack. They are probably
>>>> + * wrong, but we need something to keep from crashing again.
>>>> + */
>>>> + if (!regs)
>>>> + regs = get_irq_regs();
>>>> + if (!regs)
>>>> + regs = task_pt_regs(current);
>>>> +
>>>> + local_irq_disable();
>>>> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>> + crash_save_cpu(regs, cpu);
>>>> + cpumask_set_cpu(cpu, &cpus_in_crash);
>>>> +
>>>> + while (!atomic_read(&kexec_ready_to_reboot))
>>>> + cpu_relax();
>>>> +
>>>> + kexec_reboot();
>>>> +}
>>>> +
>>>> +/* Override the weak function in kernel/panic.c */
>>>> +void crash_smp_send_stop(void)
>>>> +{
>>>> + static int cpus_stopped;
>>>> + unsigned long timeout;
>>>> + unsigned int ncpus;
>>>> +
>>>> + /*
>>>> + * This function can be called twice in panic path, but obviously
>>>> + * we execute this only once.
>>>> + */
>>>> + if (cpus_stopped)
>>>> + return;
>>>> +
>>>> + cpus_stopped = 1;
>>>> +
>>>> + /* Excluding the panic cpu */
>>>> + ncpus = num_online_cpus() - 1;
>>>> +
>>>> + smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>> + smp_wmb();
>>>> +
>>>> + /*
>>>> + * The crash CPU sends an IPI and wait for other CPUs to
>>>> + * respond. Delay of at least 10 seconds.
>>>> + */
>>>> + pr_emerg("Sending IPI to other cpus...\n");
>>>> + timeout = USEC_PER_SEC * 10;
>>>> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>> + cpu_relax();
>>>> + udelay(1);
>>>> + }
>>>> +}
>>>> +
>>>> +#endif
>>>> +
>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>> +{
>>>> + int crashing_cpu;
>>>> +
>>>> + local_irq_disable();
>>>> +
>>>> + crashing_cpu = smp_processor_id();
>>>> + crash_save_cpu(regs, crashing_cpu);
>>>> +
>>>> + /* shutdown non-crashing cpus */
>>>> + crash_smp_send_stop();
>>>> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>> +
>>>> + pr_info("Starting crashdump kernel...\n");
>>>> +}
>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>> new file mode 100644
>>>> index 000000000000..13e5d2f7870d
>>>> --- /dev/null
>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>> @@ -0,0 +1,19 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +#include <linux/highmem.h>
>>>> +#include <linux/crash_dump.h>
>>>> +#include <linux/io.h>
>>>> +
>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>> + size_t csize, unsigned long offset)
>>>> +{
>>>> + void *vaddr;
>>>> +
>>>> + if (!csize)
>>>> + return 0;
>>>> +
>>>> + vaddr = kmap_local_pfn(pfn);
>>>> + csize = copy_to_iter(vaddr + offset, csize, iter);
>>>> + kunmap_local(vaddr);
>>>> +
>>>> + return csize;
>>>> +}
>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>> continue;
>>>> }
>>>>
>>>> - /* kexec need a safe page to save reboot_code_buffer */
>>>> + /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>
>>>> reboot_code_buffer =
>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>
>>>> kexec_reboot();
>>>> }
>>>> -
>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>> -{
>>>> -}
>>>> #endif
>>>>
>>>> void machine_shutdown(void)
>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>
>>>> jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>
>>>> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>> + if (image->type == KEXEC_TYPE_DEFAULT)
>>>> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>
>>>> /*
>>>> * The generic kexec code builds a page list with physical
>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>
>>>> /*
>>>> * We know we were online, and there will be no incoming IPIs at
>>>> - * this point.
>>>> + * this point. Mark online again before rebooting so that the crash
>>>> + * analysis tool will see us correctly.
>>>> */
>>>> set_cpu_online(smp_processor_id(), true);
>>>>
>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>> --- a/arch/loongarch/kernel/mem.c
>>>> +++ b/arch/loongarch/kernel/mem.c
>>>> @@ -5,6 +5,7 @@
>>>> #include <linux/efi.h>
>>>> #include <linux/initrd.h>
>>>> #include <linux/memblock.h>
>>>> +#include <linux/of_fdt.h>
>>>>
>>>> #include <asm/bootinfo.h>
>>>> #include <asm/loongson.h>
>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>
>>>> /* Reserve the initrd */
>>>> reserve_initrd_mem();
>>>> +
>>>> + /* Mainly reserved memory for the elf core head */
>>>> + early_init_fdt_scan_reserved_mem();
>>>> + /* Parse linux,usable-memory-range is for crash dump kernel */
>>>> + early_init_dt_check_for_usable_mem_range();
>>>> }
>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>> move s2, a2
>>>> move s3, a3
>>>>
>>>> + /*
>>>> + * In case of a kdump/crash kernel, the indirection page is not
>>>> + * populated as the kernel is directly copied to a reserved location
>>>> + */
>>>> + beqz s2, done
>>>> +
>>>> process_entry:
>>>> PTR_L s4, s2, 0
>>>> PTR_ADDI s2, s2, SZREG
>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>> index f938aae3e92c..ea34b77e402f 100644
>>>> --- a/arch/loongarch/kernel/setup.c
>>>> +++ b/arch/loongarch/kernel/setup.c
>>>> @@ -19,6 +19,8 @@
>>>> #include <linux/memblock.h>
>>>> #include <linux/initrd.h>
>>>> #include <linux/ioport.h>
>>>> +#include <linux/kexec.h>
>>>> +#include <linux/crash_dump.h>
>>>> #include <linux/root_dev.h>
>>>> #include <linux/console.h>
>>>> #include <linux/pfn.h>
>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>> }
>>>> early_param("mem", early_parse_mem);
>>>>
>>>> +static void __init loongarch_parse_crashkernel(void)
>>>> +{
>>>> +#ifdef CONFIG_KEXEC
>>>> + unsigned long long start;
>>>> + unsigned long long total_mem;
>>>> + unsigned long long crash_size, crash_base;
>>>> + int ret;
>>>> +
>>>> + total_mem = memblock_phys_mem_size();
>>>> + ret = parse_crashkernel(boot_command_line, total_mem,
>>>> + &crash_size, &crash_base);
>>>> + if (ret != 0 || crash_size <= 0)
>>>> + return;
>>>> +
>>>> +
>>>> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>> + crash_base + crash_size);
>>>> + if (start != crash_base) {
>>>> + pr_warn("Invalid memory region reserved for crash kernel\n");
>>>> + return;
>>>> + }
>>>> +
>>>> + crashk_res.start = crash_base;
>>>> + crashk_res.end = crash_base + crash_size - 1;
>>>> +#endif
>>>> +}
>>>> +
>>>> +static void __init request_crashkernel(struct resource *res)
>>>> +{
>>>> +#ifdef CONFIG_KEXEC
>>>> + int ret;
>>>> +
>>>> + if (crashk_res.start == crashk_res.end)
>>>> + return;
>>>> +
>>>> + ret = request_resource(res, &crashk_res);
>>>> + if (!ret)
>>>> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>> + (unsigned long)((crashk_res.end -
>>>> + crashk_res.start + 1) >> 20),
>>>> + (unsigned long)(crashk_res.start >> 20));
>>>> +#endif
>>>> +}
>>>> +
>>>> void __init platform_init(void)
>>>> {
>>>> efi_init();
>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>
>>>> check_kernel_sections_mem();
>>>>
>>>> + loongarch_parse_crashkernel();
>>>> +
>>>> /*
>>>> * In order to reduce the possibility of kernel panic when failed to
>>>> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>> request_resource(res, &code_resource);
>>>> request_resource(res, &data_resource);
>>>> request_resource(res, &bss_resource);
>>>> + request_crashkernel(res);
>>>> }
>>>> }
>>>>
>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>> --- a/arch/loongarch/kernel/traps.c
>>>> +++ b/arch/loongarch/kernel/traps.c
>>>> @@ -10,6 +10,7 @@
>>>> #include <linux/entry-common.h>
>>>> #include <linux/init.h>
>>>> #include <linux/kernel.h>
>>>> +#include <linux/kexec.h>
>>>> #include <linux/module.h>
>>>> #include <linux/extable.h>
>>>> #include <linux/mm.h>
>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>
>>>> oops_exit();
>>>>
>>>> + if (regs && kexec_should_crash(current))
>>>> + crash_kexec(regs);
>>>> +
>>>> if (in_interrupt())
>>>> panic("Fatal exception in interrupt");
>>>>
>>>> --
>>>> 2.36.0
>>>>
>>
On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> Hi, Huacai
> >>
> >> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>> Hi, Youling,
> >>>
> >>> I think crash.c can be merged into crash_dump.c
> >>
> >> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >> I'm not sure if merging crash.c into crash_dump.c will break its
> >> consistency?
> >>
> >> Thanks,
> >> Youling
> > Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> > can be merged into machine_kexec.c, as arm64 and riscv do.
>
> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> into machine_kexec.c, should crash_shutdown_secondary and
> crash_smp_send_stop be placed in smp.c?
I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
Huacai
>
> Youling.
> >
> > Huacai
> >>
> >>>
> >>> Huacai
> >>>
> >>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> This patch adds support for kdump, the kernel will reserve a region
> >>>> for the crash kernel and jump there on panic.
> >>>>
> >>>> Arch-specific functions are added to allow for implementing a crash
> >>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>
> >>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>> separate region for the core's ELF header within crash kdump kernel
> >>>> memory and filling it in when executing kexec_load().
> >>>>
> >>>> Then, its location will be advertised to crash dump kernel via a new
> >>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>
> >>>> At the same time, it will also limit the crash kdump kernel to the
> >>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>> so as not to destroy the original kernel dump data.
> >>>>
> >>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>> with copy_oldmem_page().
> >>>>
> >>>> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
> >>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>> triggering a crash through /proc/sysrq_trigger:
> >>>>
> >>>> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>> # echo c > /proc/sysrq_trigger
> >>>>
> >>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>> ---
> >>>> arch/loongarch/Kconfig | 22 ++++++
> >>>> arch/loongarch/Makefile | 4 +
> >>>> arch/loongarch/kernel/Makefile | 3 +-
> >>>> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
> >>>> arch/loongarch/kernel/crash_dump.c | 19 +++++
> >>>> arch/loongarch/kernel/machine_kexec.c | 12 ++-
> >>>> arch/loongarch/kernel/mem.c | 6 ++
> >>>> arch/loongarch/kernel/relocate_kernel.S | 6 ++
> >>>> arch/loongarch/kernel/setup.c | 49 ++++++++++++
> >>>> arch/loongarch/kernel/traps.c | 4 +
> >>>> 10 files changed, 217 insertions(+), 8 deletions(-)
> >>>> create mode 100644 arch/loongarch/kernel/crash.c
> >>>> create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>
> >>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>> --- a/arch/loongarch/Kconfig
> >>>> +++ b/arch/loongarch/Kconfig
> >>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>
> >>>> The name comes from the similarity to the exec system call.
> >>>>
> >>>> +config CRASH_DUMP
> >>>> + bool "Build kdump crash kernel"
> >>>> + help
> >>>> + Generate crash dump after being started by kexec. This should
> >>>> + be normally only set in special crash dump kernels which are
> >>>> + loaded in the main kernel with kexec-tools into a specially
> >>>> + reserved region and then later executed after a crash by
> >>>> + kdump/kexec.
> >>>> +
> >>>> + For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>> +
> >>>> +config PHYSICAL_START
> >>>> + hex "Physical address where the kernel is loaded"
> >>>> + default "0x9000000091000000" if 64BIT
> >>>> + depends on CRASH_DUMP
> >>>> + help
> >>>> + This gives the XKPRANGE address where the kernel is loaded.
> >>>> + If you plan to use kernel for capturing the crash dump change
> >>>> + this value to start of the reserved region (the "X" value as
> >>>> + specified in the "crashkernel=YM@XM" command line boot parameter
> >>>> + passed to the panic-ed kernel).
> >>>> +
> >>>> config SECCOMP
> >>>> bool "Enable seccomp to safely compute untrusted bytecode"
> >>>> depends on PROC_FS
> >>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>> --- a/arch/loongarch/Makefile
> >>>> +++ b/arch/loongarch/Makefile
> >>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>> cflags-y += -ffreestanding
> >>>> cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>
> >>>> +ifdef CONFIG_PHYSICAL_START
> >>>> +load-y = $(CONFIG_PHYSICAL_START)
> >>>> +else
> >>>> load-y = 0x9000000000200000
> >>>> +endif
> >>>> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>
> >>>> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
> >>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>> index 20b64ac3f128..df5aea129364 100644
> >>>> --- a/arch/loongarch/kernel/Makefile
> >>>> +++ b/arch/loongarch/kernel/Makefile
> >>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
> >>>> obj-$(CONFIG_MODULES) += module.o module-sections.o
> >>>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> >>>>
> >>>> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
> >>>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
> >>>> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
> >>>>
> >>>> obj-$(CONFIG_PROC_FS) += proc.o
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>> new file mode 100644
> >>>> index 000000000000..b4f249ec6301
> >>>> --- /dev/null
> >>>> +++ b/arch/loongarch/kernel/crash.c
> >>>> @@ -0,0 +1,100 @@
> >>>> +// SPDX-License-Identifier: GPL-2.0
> >>>> +/*
> >>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>> + *
> >>>> + * Derived from MIPS
> >>>> + */
> >>>> +#include <linux/kernel.h>
> >>>> +#include <linux/smp.h>
> >>>> +#include <linux/reboot.h>
> >>>> +#include <linux/crash_dump.h>
> >>>> +#include <linux/delay.h>
> >>>> +#include <linux/irq.h>
> >>>> +#include <linux/types.h>
> >>>> +#include <linux/sched.h>
> >>>> +#include <linux/sched/task_stack.h>
> >>>> +#include <asm/cacheflush.h>
> >>>> +#include <asm/kexec.h>
> >>>> +
> >>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>> +
> >>>> +#ifdef CONFIG_SMP
> >>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>> +{
> >>>> + struct pt_regs *regs = passed_regs;
> >>>> + int cpu = smp_processor_id();
> >>>> +
> >>>> + /*
> >>>> + * If we are passed registers, use those. Otherwise get the
> >>>> + * regs from the last interrupt, which should be correct, as
> >>>> + * we are in an interrupt. But if the regs are not there,
> >>>> + * pull them from the top of the stack. They are probably
> >>>> + * wrong, but we need something to keep from crashing again.
> >>>> + */
> >>>> + if (!regs)
> >>>> + regs = get_irq_regs();
> >>>> + if (!regs)
> >>>> + regs = task_pt_regs(current);
> >>>> +
> >>>> + local_irq_disable();
> >>>> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>> + crash_save_cpu(regs, cpu);
> >>>> + cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>> +
> >>>> + while (!atomic_read(&kexec_ready_to_reboot))
> >>>> + cpu_relax();
> >>>> +
> >>>> + kexec_reboot();
> >>>> +}
> >>>> +
> >>>> +/* Override the weak function in kernel/panic.c */
> >>>> +void crash_smp_send_stop(void)
> >>>> +{
> >>>> + static int cpus_stopped;
> >>>> + unsigned long timeout;
> >>>> + unsigned int ncpus;
> >>>> +
> >>>> + /*
> >>>> + * This function can be called twice in panic path, but obviously
> >>>> + * we execute this only once.
> >>>> + */
> >>>> + if (cpus_stopped)
> >>>> + return;
> >>>> +
> >>>> + cpus_stopped = 1;
> >>>> +
> >>>> + /* Excluding the panic cpu */
> >>>> + ncpus = num_online_cpus() - 1;
> >>>> +
> >>>> + smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>> + smp_wmb();
> >>>> +
> >>>> + /*
> >>>> + * The crash CPU sends an IPI and wait for other CPUs to
> >>>> + * respond. Delay of at least 10 seconds.
> >>>> + */
> >>>> + pr_emerg("Sending IPI to other cpus...\n");
> >>>> + timeout = USEC_PER_SEC * 10;
> >>>> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>> + cpu_relax();
> >>>> + udelay(1);
> >>>> + }
> >>>> +}
> >>>> +
> >>>> +#endif
> >>>> +
> >>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>> +{
> >>>> + int crashing_cpu;
> >>>> +
> >>>> + local_irq_disable();
> >>>> +
> >>>> + crashing_cpu = smp_processor_id();
> >>>> + crash_save_cpu(regs, crashing_cpu);
> >>>> +
> >>>> + /* shutdown non-crashing cpus */
> >>>> + crash_smp_send_stop();
> >>>> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>> +
> >>>> + pr_info("Starting crashdump kernel...\n");
> >>>> +}
> >>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>> new file mode 100644
> >>>> index 000000000000..13e5d2f7870d
> >>>> --- /dev/null
> >>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>> @@ -0,0 +1,19 @@
> >>>> +// SPDX-License-Identifier: GPL-2.0
> >>>> +#include <linux/highmem.h>
> >>>> +#include <linux/crash_dump.h>
> >>>> +#include <linux/io.h>
> >>>> +
> >>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>> + size_t csize, unsigned long offset)
> >>>> +{
> >>>> + void *vaddr;
> >>>> +
> >>>> + if (!csize)
> >>>> + return 0;
> >>>> +
> >>>> + vaddr = kmap_local_pfn(pfn);
> >>>> + csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>> + kunmap_local(vaddr);
> >>>> +
> >>>> + return csize;
> >>>> +}
> >>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>> continue;
> >>>> }
> >>>>
> >>>> - /* kexec need a safe page to save reboot_code_buffer */
> >>>> + /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>
> >>>> reboot_code_buffer =
> >>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>
> >>>> kexec_reboot();
> >>>> }
> >>>> -
> >>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>> -{
> >>>> -}
> >>>> #endif
> >>>>
> >>>> void machine_shutdown(void)
> >>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>
> >>>> jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>
> >>>> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>> + if (image->type == KEXEC_TYPE_DEFAULT)
> >>>> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>
> >>>> /*
> >>>> * The generic kexec code builds a page list with physical
> >>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>
> >>>> /*
> >>>> * We know we were online, and there will be no incoming IPIs at
> >>>> - * this point.
> >>>> + * this point. Mark online again before rebooting so that the crash
> >>>> + * analysis tool will see us correctly.
> >>>> */
> >>>> set_cpu_online(smp_processor_id(), true);
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>> --- a/arch/loongarch/kernel/mem.c
> >>>> +++ b/arch/loongarch/kernel/mem.c
> >>>> @@ -5,6 +5,7 @@
> >>>> #include <linux/efi.h>
> >>>> #include <linux/initrd.h>
> >>>> #include <linux/memblock.h>
> >>>> +#include <linux/of_fdt.h>
> >>>>
> >>>> #include <asm/bootinfo.h>
> >>>> #include <asm/loongson.h>
> >>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>
> >>>> /* Reserve the initrd */
> >>>> reserve_initrd_mem();
> >>>> +
> >>>> + /* Mainly reserved memory for the elf core head */
> >>>> + early_init_fdt_scan_reserved_mem();
> >>>> + /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>> + early_init_dt_check_for_usable_mem_range();
> >>>> }
> >>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>> move s2, a2
> >>>> move s3, a3
> >>>>
> >>>> + /*
> >>>> + * In case of a kdump/crash kernel, the indirection page is not
> >>>> + * populated as the kernel is directly copied to a reserved location
> >>>> + */
> >>>> + beqz s2, done
> >>>> +
> >>>> process_entry:
> >>>> PTR_L s4, s2, 0
> >>>> PTR_ADDI s2, s2, SZREG
> >>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>> index f938aae3e92c..ea34b77e402f 100644
> >>>> --- a/arch/loongarch/kernel/setup.c
> >>>> +++ b/arch/loongarch/kernel/setup.c
> >>>> @@ -19,6 +19,8 @@
> >>>> #include <linux/memblock.h>
> >>>> #include <linux/initrd.h>
> >>>> #include <linux/ioport.h>
> >>>> +#include <linux/kexec.h>
> >>>> +#include <linux/crash_dump.h>
> >>>> #include <linux/root_dev.h>
> >>>> #include <linux/console.h>
> >>>> #include <linux/pfn.h>
> >>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>> }
> >>>> early_param("mem", early_parse_mem);
> >>>>
> >>>> +static void __init loongarch_parse_crashkernel(void)
> >>>> +{
> >>>> +#ifdef CONFIG_KEXEC
> >>>> + unsigned long long start;
> >>>> + unsigned long long total_mem;
> >>>> + unsigned long long crash_size, crash_base;
> >>>> + int ret;
> >>>> +
> >>>> + total_mem = memblock_phys_mem_size();
> >>>> + ret = parse_crashkernel(boot_command_line, total_mem,
> >>>> + &crash_size, &crash_base);
> >>>> + if (ret != 0 || crash_size <= 0)
> >>>> + return;
> >>>> +
> >>>> +
> >>>> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>> + crash_base + crash_size);
> >>>> + if (start != crash_base) {
> >>>> + pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>> + return;
> >>>> + }
> >>>> +
> >>>> + crashk_res.start = crash_base;
> >>>> + crashk_res.end = crash_base + crash_size - 1;
> >>>> +#endif
> >>>> +}
> >>>> +
> >>>> +static void __init request_crashkernel(struct resource *res)
> >>>> +{
> >>>> +#ifdef CONFIG_KEXEC
> >>>> + int ret;
> >>>> +
> >>>> + if (crashk_res.start == crashk_res.end)
> >>>> + return;
> >>>> +
> >>>> + ret = request_resource(res, &crashk_res);
> >>>> + if (!ret)
> >>>> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>> + (unsigned long)((crashk_res.end -
> >>>> + crashk_res.start + 1) >> 20),
> >>>> + (unsigned long)(crashk_res.start >> 20));
> >>>> +#endif
> >>>> +}
> >>>> +
> >>>> void __init platform_init(void)
> >>>> {
> >>>> efi_init();
> >>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>
> >>>> check_kernel_sections_mem();
> >>>>
> >>>> + loongarch_parse_crashkernel();
> >>>> +
> >>>> /*
> >>>> * In order to reduce the possibility of kernel panic when failed to
> >>>> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>> request_resource(res, &code_resource);
> >>>> request_resource(res, &data_resource);
> >>>> request_resource(res, &bss_resource);
> >>>> + request_crashkernel(res);
> >>>> }
> >>>> }
> >>>>
> >>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>> --- a/arch/loongarch/kernel/traps.c
> >>>> +++ b/arch/loongarch/kernel/traps.c
> >>>> @@ -10,6 +10,7 @@
> >>>> #include <linux/entry-common.h>
> >>>> #include <linux/init.h>
> >>>> #include <linux/kernel.h>
> >>>> +#include <linux/kexec.h>
> >>>> #include <linux/module.h>
> >>>> #include <linux/extable.h>
> >>>> #include <linux/mm.h>
> >>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>
> >>>> oops_exit();
> >>>>
> >>>> + if (regs && kexec_should_crash(current))
> >>>> + crash_kexec(regs);
> >>>> +
> >>>> if (in_interrupt())
> >>>> panic("Fatal exception in interrupt");
> >>>>
> >>>> --
> >>>> 2.36.0
> >>>>
> >>
>
On 09/05/2022 10:14 AM, Huacai Chen wrote:
> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> Hi, Huacai
>>>>
>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>> Hi, Youling,
>>>>>
>>>>> I think crash.c can be merged into crash_dump.c
>>>>
>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>> consistency?
>>>>
>>>> Thanks,
>>>> Youling
>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>
>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>> into machine_kexec.c, should crash_shutdown_secondary and
>> crash_smp_send_stop be placed in smp.c?
> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
Ok, I'll merge all into machine_kexec.c.
Youling.
>
> Huacai
>>
>> Youling.
>>>
>>> Huacai
>>>>
>>>>>
>>>>> Huacai
>>>>>
>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>> for the crash kernel and jump there on panic.
>>>>>>
>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>
>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>> memory and filling it in when executing kexec_load().
>>>>>>
>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>
>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>> so as not to destroy the original kernel dump data.
>>>>>>
>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>> with copy_oldmem_page().
>>>>>>
>>>>>> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>
>>>>>> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>> # echo c > /proc/sysrq_trigger
>>>>>>
>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>> ---
>>>>>> arch/loongarch/Kconfig | 22 ++++++
>>>>>> arch/loongarch/Makefile | 4 +
>>>>>> arch/loongarch/kernel/Makefile | 3 +-
>>>>>> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
>>>>>> arch/loongarch/kernel/crash_dump.c | 19 +++++
>>>>>> arch/loongarch/kernel/machine_kexec.c | 12 ++-
>>>>>> arch/loongarch/kernel/mem.c | 6 ++
>>>>>> arch/loongarch/kernel/relocate_kernel.S | 6 ++
>>>>>> arch/loongarch/kernel/setup.c | 49 ++++++++++++
>>>>>> arch/loongarch/kernel/traps.c | 4 +
>>>>>> 10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>> create mode 100644 arch/loongarch/kernel/crash.c
>>>>>> create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>
>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>> --- a/arch/loongarch/Kconfig
>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>
>>>>>> The name comes from the similarity to the exec system call.
>>>>>>
>>>>>> +config CRASH_DUMP
>>>>>> + bool "Build kdump crash kernel"
>>>>>> + help
>>>>>> + Generate crash dump after being started by kexec. This should
>>>>>> + be normally only set in special crash dump kernels which are
>>>>>> + loaded in the main kernel with kexec-tools into a specially
>>>>>> + reserved region and then later executed after a crash by
>>>>>> + kdump/kexec.
>>>>>> +
>>>>>> + For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>> +
>>>>>> +config PHYSICAL_START
>>>>>> + hex "Physical address where the kernel is loaded"
>>>>>> + default "0x9000000091000000" if 64BIT
>>>>>> + depends on CRASH_DUMP
>>>>>> + help
>>>>>> + This gives the XKPRANGE address where the kernel is loaded.
>>>>>> + If you plan to use kernel for capturing the crash dump change
>>>>>> + this value to start of the reserved region (the "X" value as
>>>>>> + specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>> + passed to the panic-ed kernel).
>>>>>> +
>>>>>> config SECCOMP
>>>>>> bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>> depends on PROC_FS
>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>> --- a/arch/loongarch/Makefile
>>>>>> +++ b/arch/loongarch/Makefile
>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>> cflags-y += -ffreestanding
>>>>>> cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>
>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>> +load-y = $(CONFIG_PHYSICAL_START)
>>>>>> +else
>>>>>> load-y = 0x9000000000200000
>>>>>> +endif
>>>>>> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>
>>>>>> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
>>>>>> obj-$(CONFIG_MODULES) += module.o module-sections.o
>>>>>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
>>>>>>
>>>>>> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
>>>>>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
>>>>>> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
>>>>>>
>>>>>> obj-$(CONFIG_PROC_FS) += proc.o
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..b4f249ec6301
>>>>>> --- /dev/null
>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>> @@ -0,0 +1,100 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>> +/*
>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>> + *
>>>>>> + * Derived from MIPS
>>>>>> + */
>>>>>> +#include <linux/kernel.h>
>>>>>> +#include <linux/smp.h>
>>>>>> +#include <linux/reboot.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>> +#include <linux/delay.h>
>>>>>> +#include <linux/irq.h>
>>>>>> +#include <linux/types.h>
>>>>>> +#include <linux/sched.h>
>>>>>> +#include <linux/sched/task_stack.h>
>>>>>> +#include <asm/cacheflush.h>
>>>>>> +#include <asm/kexec.h>
>>>>>> +
>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>> +
>>>>>> +#ifdef CONFIG_SMP
>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>> +{
>>>>>> + struct pt_regs *regs = passed_regs;
>>>>>> + int cpu = smp_processor_id();
>>>>>> +
>>>>>> + /*
>>>>>> + * If we are passed registers, use those. Otherwise get the
>>>>>> + * regs from the last interrupt, which should be correct, as
>>>>>> + * we are in an interrupt. But if the regs are not there,
>>>>>> + * pull them from the top of the stack. They are probably
>>>>>> + * wrong, but we need something to keep from crashing again.
>>>>>> + */
>>>>>> + if (!regs)
>>>>>> + regs = get_irq_regs();
>>>>>> + if (!regs)
>>>>>> + regs = task_pt_regs(current);
>>>>>> +
>>>>>> + local_irq_disable();
>>>>>> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>> + crash_save_cpu(regs, cpu);
>>>>>> + cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>> +
>>>>>> + while (!atomic_read(&kexec_ready_to_reboot))
>>>>>> + cpu_relax();
>>>>>> +
>>>>>> + kexec_reboot();
>>>>>> +}
>>>>>> +
>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>> +void crash_smp_send_stop(void)
>>>>>> +{
>>>>>> + static int cpus_stopped;
>>>>>> + unsigned long timeout;
>>>>>> + unsigned int ncpus;
>>>>>> +
>>>>>> + /*
>>>>>> + * This function can be called twice in panic path, but obviously
>>>>>> + * we execute this only once.
>>>>>> + */
>>>>>> + if (cpus_stopped)
>>>>>> + return;
>>>>>> +
>>>>>> + cpus_stopped = 1;
>>>>>> +
>>>>>> + /* Excluding the panic cpu */
>>>>>> + ncpus = num_online_cpus() - 1;
>>>>>> +
>>>>>> + smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>> + smp_wmb();
>>>>>> +
>>>>>> + /*
>>>>>> + * The crash CPU sends an IPI and wait for other CPUs to
>>>>>> + * respond. Delay of at least 10 seconds.
>>>>>> + */
>>>>>> + pr_emerg("Sending IPI to other cpus...\n");
>>>>>> + timeout = USEC_PER_SEC * 10;
>>>>>> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>> + cpu_relax();
>>>>>> + udelay(1);
>>>>>> + }
>>>>>> +}
>>>>>> +
>>>>>> +#endif
>>>>>> +
>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>> +{
>>>>>> + int crashing_cpu;
>>>>>> +
>>>>>> + local_irq_disable();
>>>>>> +
>>>>>> + crashing_cpu = smp_processor_id();
>>>>>> + crash_save_cpu(regs, crashing_cpu);
>>>>>> +
>>>>>> + /* shutdown non-crashing cpus */
>>>>>> + crash_smp_send_stop();
>>>>>> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>> +
>>>>>> + pr_info("Starting crashdump kernel...\n");
>>>>>> +}
>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..13e5d2f7870d
>>>>>> --- /dev/null
>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>> @@ -0,0 +1,19 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>> +#include <linux/highmem.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>> +#include <linux/io.h>
>>>>>> +
>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>> + size_t csize, unsigned long offset)
>>>>>> +{
>>>>>> + void *vaddr;
>>>>>> +
>>>>>> + if (!csize)
>>>>>> + return 0;
>>>>>> +
>>>>>> + vaddr = kmap_local_pfn(pfn);
>>>>>> + csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>> + kunmap_local(vaddr);
>>>>>> +
>>>>>> + return csize;
>>>>>> +}
>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>> continue;
>>>>>> }
>>>>>>
>>>>>> - /* kexec need a safe page to save reboot_code_buffer */
>>>>>> + /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>
>>>>>> reboot_code_buffer =
>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>
>>>>>> kexec_reboot();
>>>>>> }
>>>>>> -
>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>> -{
>>>>>> -}
>>>>>> #endif
>>>>>>
>>>>>> void machine_shutdown(void)
>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>
>>>>>> jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>
>>>>>> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>> + if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>
>>>>>> /*
>>>>>> * The generic kexec code builds a page list with physical
>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>
>>>>>> /*
>>>>>> * We know we were online, and there will be no incoming IPIs at
>>>>>> - * this point.
>>>>>> + * this point. Mark online again before rebooting so that the crash
>>>>>> + * analysis tool will see us correctly.
>>>>>> */
>>>>>> set_cpu_online(smp_processor_id(), true);
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>> @@ -5,6 +5,7 @@
>>>>>> #include <linux/efi.h>
>>>>>> #include <linux/initrd.h>
>>>>>> #include <linux/memblock.h>
>>>>>> +#include <linux/of_fdt.h>
>>>>>>
>>>>>> #include <asm/bootinfo.h>
>>>>>> #include <asm/loongson.h>
>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>
>>>>>> /* Reserve the initrd */
>>>>>> reserve_initrd_mem();
>>>>>> +
>>>>>> + /* Mainly reserved memory for the elf core head */
>>>>>> + early_init_fdt_scan_reserved_mem();
>>>>>> + /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>> + early_init_dt_check_for_usable_mem_range();
>>>>>> }
>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>> move s2, a2
>>>>>> move s3, a3
>>>>>>
>>>>>> + /*
>>>>>> + * In case of a kdump/crash kernel, the indirection page is not
>>>>>> + * populated as the kernel is directly copied to a reserved location
>>>>>> + */
>>>>>> + beqz s2, done
>>>>>> +
>>>>>> process_entry:
>>>>>> PTR_L s4, s2, 0
>>>>>> PTR_ADDI s2, s2, SZREG
>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>> @@ -19,6 +19,8 @@
>>>>>> #include <linux/memblock.h>
>>>>>> #include <linux/initrd.h>
>>>>>> #include <linux/ioport.h>
>>>>>> +#include <linux/kexec.h>
>>>>>> +#include <linux/crash_dump.h>
>>>>>> #include <linux/root_dev.h>
>>>>>> #include <linux/console.h>
>>>>>> #include <linux/pfn.h>
>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>> }
>>>>>> early_param("mem", early_parse_mem);
>>>>>>
>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>> +{
>>>>>> +#ifdef CONFIG_KEXEC
>>>>>> + unsigned long long start;
>>>>>> + unsigned long long total_mem;
>>>>>> + unsigned long long crash_size, crash_base;
>>>>>> + int ret;
>>>>>> +
>>>>>> + total_mem = memblock_phys_mem_size();
>>>>>> + ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>> + &crash_size, &crash_base);
>>>>>> + if (ret != 0 || crash_size <= 0)
>>>>>> + return;
>>>>>> +
>>>>>> +
>>>>>> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>> + crash_base + crash_size);
>>>>>> + if (start != crash_base) {
>>>>>> + pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>> + return;
>>>>>> + }
>>>>>> +
>>>>>> + crashk_res.start = crash_base;
>>>>>> + crashk_res.end = crash_base + crash_size - 1;
>>>>>> +#endif
>>>>>> +}
>>>>>> +
>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>> +{
>>>>>> +#ifdef CONFIG_KEXEC
>>>>>> + int ret;
>>>>>> +
>>>>>> + if (crashk_res.start == crashk_res.end)
>>>>>> + return;
>>>>>> +
>>>>>> + ret = request_resource(res, &crashk_res);
>>>>>> + if (!ret)
>>>>>> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>> + (unsigned long)((crashk_res.end -
>>>>>> + crashk_res.start + 1) >> 20),
>>>>>> + (unsigned long)(crashk_res.start >> 20));
>>>>>> +#endif
>>>>>> +}
>>>>>> +
>>>>>> void __init platform_init(void)
>>>>>> {
>>>>>> efi_init();
>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>
>>>>>> check_kernel_sections_mem();
>>>>>>
>>>>>> + loongarch_parse_crashkernel();
>>>>>> +
>>>>>> /*
>>>>>> * In order to reduce the possibility of kernel panic when failed to
>>>>>> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>> request_resource(res, &code_resource);
>>>>>> request_resource(res, &data_resource);
>>>>>> request_resource(res, &bss_resource);
>>>>>> + request_crashkernel(res);
>>>>>> }
>>>>>> }
>>>>>>
>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>> @@ -10,6 +10,7 @@
>>>>>> #include <linux/entry-common.h>
>>>>>> #include <linux/init.h>
>>>>>> #include <linux/kernel.h>
>>>>>> +#include <linux/kexec.h>
>>>>>> #include <linux/module.h>
>>>>>> #include <linux/extable.h>
>>>>>> #include <linux/mm.h>
>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>
>>>>>> oops_exit();
>>>>>>
>>>>>> + if (regs && kexec_should_crash(current))
>>>>>> + crash_kexec(regs);
>>>>>> +
>>>>>> if (in_interrupt())
>>>>>> panic("Fatal exception in interrupt");
>>>>>>
>>>>>> --
>>>>>> 2.36.0
>>>>>>
>>>>
>>
Hi, Youling,
On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>
>
>
> On 09/05/2022 10:14 AM, Huacai Chen wrote:
> > On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >> Hi, Huacai
> >>
> >> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> >>> Hi, Youling,
> >>>
> >>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> Hi, Huacai
> >>>>
> >>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>>>> Hi, Youling,
> >>>>>
> >>>>> I think crash.c can be merged into crash_dump.c
> >>>>
> >>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >>>> I'm not sure if merging crash.c into crash_dump.c will break its
> >>>> consistency?
> >>>>
> >>>> Thanks,
> >>>> Youling
> >>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> >>> can be merged into machine_kexec.c, as arm64 and riscv do.
> >>
> >> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> >> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> >> into machine_kexec.c, should crash_shutdown_secondary and
> >> crash_smp_send_stop be placed in smp.c?
> > I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>
> Ok, I'll merge all into machine_kexec.c.
>
> Youling.
Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
If you want to skip the "low memory", maybe we can use
0x9000000090000000 or 0x90000000a0000000?
Huacai
>
> >
> > Huacai
> >>
> >> Youling.
> >>>
> >>> Huacai
> >>>>
> >>>>>
> >>>>> Huacai
> >>>>>
> >>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>
> >>>>>> This patch adds support for kdump, the kernel will reserve a region
> >>>>>> for the crash kernel and jump there on panic.
> >>>>>>
> >>>>>> Arch-specific functions are added to allow for implementing a crash
> >>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>>>
> >>>>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>>>> separate region for the core's ELF header within crash kdump kernel
> >>>>>> memory and filling it in when executing kexec_load().
> >>>>>>
> >>>>>> Then, its location will be advertised to crash dump kernel via a new
> >>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>>>
> >>>>>> At the same time, it will also limit the crash kdump kernel to the
> >>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>>>> so as not to destroy the original kernel dump data.
> >>>>>>
> >>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>>>> with copy_oldmem_page().
> >>>>>>
> >>>>>> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
> >>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>>>> triggering a crash through /proc/sysrq_trigger:
> >>>>>>
> >>>>>> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>>>> # echo c > /proc/sysrq_trigger
> >>>>>>
> >>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>>>> ---
> >>>>>> arch/loongarch/Kconfig | 22 ++++++
> >>>>>> arch/loongarch/Makefile | 4 +
> >>>>>> arch/loongarch/kernel/Makefile | 3 +-
> >>>>>> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
> >>>>>> arch/loongarch/kernel/crash_dump.c | 19 +++++
> >>>>>> arch/loongarch/kernel/machine_kexec.c | 12 ++-
> >>>>>> arch/loongarch/kernel/mem.c | 6 ++
> >>>>>> arch/loongarch/kernel/relocate_kernel.S | 6 ++
> >>>>>> arch/loongarch/kernel/setup.c | 49 ++++++++++++
> >>>>>> arch/loongarch/kernel/traps.c | 4 +
> >>>>>> 10 files changed, 217 insertions(+), 8 deletions(-)
> >>>>>> create mode 100644 arch/loongarch/kernel/crash.c
> >>>>>> create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>>>> --- a/arch/loongarch/Kconfig
> >>>>>> +++ b/arch/loongarch/Kconfig
> >>>>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>>>
> >>>>>> The name comes from the similarity to the exec system call.
> >>>>>>
> >>>>>> +config CRASH_DUMP
> >>>>>> + bool "Build kdump crash kernel"
> >>>>>> + help
> >>>>>> + Generate crash dump after being started by kexec. This should
> >>>>>> + be normally only set in special crash dump kernels which are
> >>>>>> + loaded in the main kernel with kexec-tools into a specially
> >>>>>> + reserved region and then later executed after a crash by
> >>>>>> + kdump/kexec.
> >>>>>> +
> >>>>>> + For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>>>> +
> >>>>>> +config PHYSICAL_START
> >>>>>> + hex "Physical address where the kernel is loaded"
> >>>>>> + default "0x9000000091000000" if 64BIT
> >>>>>> + depends on CRASH_DUMP
> >>>>>> + help
> >>>>>> + This gives the XKPRANGE address where the kernel is loaded.
> >>>>>> + If you plan to use kernel for capturing the crash dump change
> >>>>>> + this value to start of the reserved region (the "X" value as
> >>>>>> + specified in the "crashkernel=YM@XM" command line boot parameter
> >>>>>> + passed to the panic-ed kernel).
> >>>>>> +
> >>>>>> config SECCOMP
> >>>>>> bool "Enable seccomp to safely compute untrusted bytecode"
> >>>>>> depends on PROC_FS
> >>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>>>> --- a/arch/loongarch/Makefile
> >>>>>> +++ b/arch/loongarch/Makefile
> >>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>>>> cflags-y += -ffreestanding
> >>>>>> cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>>>
> >>>>>> +ifdef CONFIG_PHYSICAL_START
> >>>>>> +load-y = $(CONFIG_PHYSICAL_START)
> >>>>>> +else
> >>>>>> load-y = 0x9000000000200000
> >>>>>> +endif
> >>>>>> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>>>
> >>>>>> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
> >>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>>>> index 20b64ac3f128..df5aea129364 100644
> >>>>>> --- a/arch/loongarch/kernel/Makefile
> >>>>>> +++ b/arch/loongarch/kernel/Makefile
> >>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
> >>>>>> obj-$(CONFIG_MODULES) += module.o module-sections.o
> >>>>>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> >>>>>>
> >>>>>> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
> >>>>>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
> >>>>>> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
> >>>>>>
> >>>>>> obj-$(CONFIG_PROC_FS) += proc.o
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>>>> new file mode 100644
> >>>>>> index 000000000000..b4f249ec6301
> >>>>>> --- /dev/null
> >>>>>> +++ b/arch/loongarch/kernel/crash.c
> >>>>>> @@ -0,0 +1,100 @@
> >>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>> +/*
> >>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>>>> + *
> >>>>>> + * Derived from MIPS
> >>>>>> + */
> >>>>>> +#include <linux/kernel.h>
> >>>>>> +#include <linux/smp.h>
> >>>>>> +#include <linux/reboot.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>> +#include <linux/delay.h>
> >>>>>> +#include <linux/irq.h>
> >>>>>> +#include <linux/types.h>
> >>>>>> +#include <linux/sched.h>
> >>>>>> +#include <linux/sched/task_stack.h>
> >>>>>> +#include <asm/cacheflush.h>
> >>>>>> +#include <asm/kexec.h>
> >>>>>> +
> >>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>>>> +
> >>>>>> +#ifdef CONFIG_SMP
> >>>>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>>>> +{
> >>>>>> + struct pt_regs *regs = passed_regs;
> >>>>>> + int cpu = smp_processor_id();
> >>>>>> +
> >>>>>> + /*
> >>>>>> + * If we are passed registers, use those. Otherwise get the
> >>>>>> + * regs from the last interrupt, which should be correct, as
> >>>>>> + * we are in an interrupt. But if the regs are not there,
> >>>>>> + * pull them from the top of the stack. They are probably
> >>>>>> + * wrong, but we need something to keep from crashing again.
> >>>>>> + */
> >>>>>> + if (!regs)
> >>>>>> + regs = get_irq_regs();
> >>>>>> + if (!regs)
> >>>>>> + regs = task_pt_regs(current);
> >>>>>> +
> >>>>>> + local_irq_disable();
> >>>>>> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>>>> + crash_save_cpu(regs, cpu);
> >>>>>> + cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>>>> +
> >>>>>> + while (!atomic_read(&kexec_ready_to_reboot))
> >>>>>> + cpu_relax();
> >>>>>> +
> >>>>>> + kexec_reboot();
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* Override the weak function in kernel/panic.c */
> >>>>>> +void crash_smp_send_stop(void)
> >>>>>> +{
> >>>>>> + static int cpus_stopped;
> >>>>>> + unsigned long timeout;
> >>>>>> + unsigned int ncpus;
> >>>>>> +
> >>>>>> + /*
> >>>>>> + * This function can be called twice in panic path, but obviously
> >>>>>> + * we execute this only once.
> >>>>>> + */
> >>>>>> + if (cpus_stopped)
> >>>>>> + return;
> >>>>>> +
> >>>>>> + cpus_stopped = 1;
> >>>>>> +
> >>>>>> + /* Excluding the panic cpu */
> >>>>>> + ncpus = num_online_cpus() - 1;
> >>>>>> +
> >>>>>> + smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>>>> + smp_wmb();
> >>>>>> +
> >>>>>> + /*
> >>>>>> + * The crash CPU sends an IPI and wait for other CPUs to
> >>>>>> + * respond. Delay of at least 10 seconds.
> >>>>>> + */
> >>>>>> + pr_emerg("Sending IPI to other cpus...\n");
> >>>>>> + timeout = USEC_PER_SEC * 10;
> >>>>>> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>>>> + cpu_relax();
> >>>>>> + udelay(1);
> >>>>>> + }
> >>>>>> +}
> >>>>>> +
> >>>>>> +#endif
> >>>>>> +
> >>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>> +{
> >>>>>> + int crashing_cpu;
> >>>>>> +
> >>>>>> + local_irq_disable();
> >>>>>> +
> >>>>>> + crashing_cpu = smp_processor_id();
> >>>>>> + crash_save_cpu(regs, crashing_cpu);
> >>>>>> +
> >>>>>> + /* shutdown non-crashing cpus */
> >>>>>> + crash_smp_send_stop();
> >>>>>> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>>>> +
> >>>>>> + pr_info("Starting crashdump kernel...\n");
> >>>>>> +}
> >>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>>>> new file mode 100644
> >>>>>> index 000000000000..13e5d2f7870d
> >>>>>> --- /dev/null
> >>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>>>> @@ -0,0 +1,19 @@
> >>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>> +#include <linux/highmem.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>> +#include <linux/io.h>
> >>>>>> +
> >>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>>>> + size_t csize, unsigned long offset)
> >>>>>> +{
> >>>>>> + void *vaddr;
> >>>>>> +
> >>>>>> + if (!csize)
> >>>>>> + return 0;
> >>>>>> +
> >>>>>> + vaddr = kmap_local_pfn(pfn);
> >>>>>> + csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>>>> + kunmap_local(vaddr);
> >>>>>> +
> >>>>>> + return csize;
> >>>>>> +}
> >>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>>>> continue;
> >>>>>> }
> >>>>>>
> >>>>>> - /* kexec need a safe page to save reboot_code_buffer */
> >>>>>> + /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>>>> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>>>
> >>>>>> reboot_code_buffer =
> >>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>>>
> >>>>>> kexec_reboot();
> >>>>>> }
> >>>>>> -
> >>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>> -{
> >>>>>> -}
> >>>>>> #endif
> >>>>>>
> >>>>>> void machine_shutdown(void)
> >>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>
> >>>>>> jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>>>
> >>>>>> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>> + if (image->type == KEXEC_TYPE_DEFAULT)
> >>>>>> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>
> >>>>>> /*
> >>>>>> * The generic kexec code builds a page list with physical
> >>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>
> >>>>>> /*
> >>>>>> * We know we were online, and there will be no incoming IPIs at
> >>>>>> - * this point.
> >>>>>> + * this point. Mark online again before rebooting so that the crash
> >>>>>> + * analysis tool will see us correctly.
> >>>>>> */
> >>>>>> set_cpu_online(smp_processor_id(), true);
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>>>> --- a/arch/loongarch/kernel/mem.c
> >>>>>> +++ b/arch/loongarch/kernel/mem.c
> >>>>>> @@ -5,6 +5,7 @@
> >>>>>> #include <linux/efi.h>
> >>>>>> #include <linux/initrd.h>
> >>>>>> #include <linux/memblock.h>
> >>>>>> +#include <linux/of_fdt.h>
> >>>>>>
> >>>>>> #include <asm/bootinfo.h>
> >>>>>> #include <asm/loongson.h>
> >>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>>>
> >>>>>> /* Reserve the initrd */
> >>>>>> reserve_initrd_mem();
> >>>>>> +
> >>>>>> + /* Mainly reserved memory for the elf core head */
> >>>>>> + early_init_fdt_scan_reserved_mem();
> >>>>>> + /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>>>> + early_init_dt_check_for_usable_mem_range();
> >>>>>> }
> >>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>>>> move s2, a2
> >>>>>> move s3, a3
> >>>>>>
> >>>>>> + /*
> >>>>>> + * In case of a kdump/crash kernel, the indirection page is not
> >>>>>> + * populated as the kernel is directly copied to a reserved location
> >>>>>> + */
> >>>>>> + beqz s2, done
> >>>>>> +
> >>>>>> process_entry:
> >>>>>> PTR_L s4, s2, 0
> >>>>>> PTR_ADDI s2, s2, SZREG
> >>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>>>> index f938aae3e92c..ea34b77e402f 100644
> >>>>>> --- a/arch/loongarch/kernel/setup.c
> >>>>>> +++ b/arch/loongarch/kernel/setup.c
> >>>>>> @@ -19,6 +19,8 @@
> >>>>>> #include <linux/memblock.h>
> >>>>>> #include <linux/initrd.h>
> >>>>>> #include <linux/ioport.h>
> >>>>>> +#include <linux/kexec.h>
> >>>>>> +#include <linux/crash_dump.h>
> >>>>>> #include <linux/root_dev.h>
> >>>>>> #include <linux/console.h>
> >>>>>> #include <linux/pfn.h>
> >>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>>>> }
> >>>>>> early_param("mem", early_parse_mem);
> >>>>>>
> >>>>>> +static void __init loongarch_parse_crashkernel(void)
> >>>>>> +{
> >>>>>> +#ifdef CONFIG_KEXEC
> >>>>>> + unsigned long long start;
> >>>>>> + unsigned long long total_mem;
> >>>>>> + unsigned long long crash_size, crash_base;
> >>>>>> + int ret;
> >>>>>> +
> >>>>>> + total_mem = memblock_phys_mem_size();
> >>>>>> + ret = parse_crashkernel(boot_command_line, total_mem,
> >>>>>> + &crash_size, &crash_base);
> >>>>>> + if (ret != 0 || crash_size <= 0)
> >>>>>> + return;
> >>>>>> +
> >>>>>> +
> >>>>>> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>>>> + crash_base + crash_size);
> >>>>>> + if (start != crash_base) {
> >>>>>> + pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>>>> + return;
> >>>>>> + }
> >>>>>> +
> >>>>>> + crashk_res.start = crash_base;
> >>>>>> + crashk_res.end = crash_base + crash_size - 1;
> >>>>>> +#endif
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void __init request_crashkernel(struct resource *res)
> >>>>>> +{
> >>>>>> +#ifdef CONFIG_KEXEC
> >>>>>> + int ret;
> >>>>>> +
> >>>>>> + if (crashk_res.start == crashk_res.end)
> >>>>>> + return;
> >>>>>> +
> >>>>>> + ret = request_resource(res, &crashk_res);
> >>>>>> + if (!ret)
> >>>>>> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>>>> + (unsigned long)((crashk_res.end -
> >>>>>> + crashk_res.start + 1) >> 20),
> >>>>>> + (unsigned long)(crashk_res.start >> 20));
> >>>>>> +#endif
> >>>>>> +}
> >>>>>> +
> >>>>>> void __init platform_init(void)
> >>>>>> {
> >>>>>> efi_init();
> >>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>>>
> >>>>>> check_kernel_sections_mem();
> >>>>>>
> >>>>>> + loongarch_parse_crashkernel();
> >>>>>> +
> >>>>>> /*
> >>>>>> * In order to reduce the possibility of kernel panic when failed to
> >>>>>> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>>>> request_resource(res, &code_resource);
> >>>>>> request_resource(res, &data_resource);
> >>>>>> request_resource(res, &bss_resource);
> >>>>>> + request_crashkernel(res);
> >>>>>> }
> >>>>>> }
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>>>> --- a/arch/loongarch/kernel/traps.c
> >>>>>> +++ b/arch/loongarch/kernel/traps.c
> >>>>>> @@ -10,6 +10,7 @@
> >>>>>> #include <linux/entry-common.h>
> >>>>>> #include <linux/init.h>
> >>>>>> #include <linux/kernel.h>
> >>>>>> +#include <linux/kexec.h>
> >>>>>> #include <linux/module.h>
> >>>>>> #include <linux/extable.h>
> >>>>>> #include <linux/mm.h>
> >>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>>>
> >>>>>> oops_exit();
> >>>>>>
> >>>>>> + if (regs && kexec_should_crash(current))
> >>>>>> + crash_kexec(regs);
> >>>>>> +
> >>>>>> if (in_interrupt())
> >>>>>> panic("Fatal exception in interrupt");
> >>>>>>
> >>>>>> --
> >>>>>> 2.36.0
> >>>>>>
> >>>>
> >>
>
Hi, Huacai
On 09/05/2022 03:32 PM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>>
>>
>> On 09/05/2022 10:14 AM, Huacai Chen wrote:
>>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>> Hi, Huacai
>>>>
>>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>>>> Hi, Youling,
>>>>>
>>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> Hi, Huacai
>>>>>>
>>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>>>> Hi, Youling,
>>>>>>>
>>>>>>> I think crash.c can be merged into crash_dump.c
>>>>>>
>>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>>>> consistency?
>>>>>>
>>>>>> Thanks,
>>>>>> Youling
>>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>>>
>>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>>>> into machine_kexec.c, should crash_shutdown_secondary and
>>>> crash_smp_send_stop be placed in smp.c?
>>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>>
>> Ok, I'll merge all into machine_kexec.c.
>>
>> Youling.
> Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
> If you want to skip the "low memory", maybe we can use
> 0x9000000090000000 or 0x90000000a0000000?
Because there are many holes in our memory layout, if PHYSICAL_START is
set to 0x90000000a0000000, the largest reserved area of the crashkernel
will be 512M, beyond which it will fail.
# cat /proc/iomem
90400000-bfffffff : System RAM
c0020000-f9efffff : System RAM
f6810000-f6813fff : Reserved
The second System RAM starts at 0x90400000, so 0x9000000090000000 will
be too small.
Youling.
>
> Huacai
>>
>>>
>>> Huacai
>>>>
>>>> Youling.
>>>>>
>>>>> Huacai
>>>>>>
>>>>>>>
>>>>>>> Huacai
>>>>>>>
>>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>
>>>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>>>> for the crash kernel and jump there on panic.
>>>>>>>>
>>>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>>>
>>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>>>> memory and filling it in when executing kexec_load().
>>>>>>>>
>>>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>>>
>>>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>>>> so as not to destroy the original kernel dump data.
>>>>>>>>
>>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>>>> with copy_oldmem_page().
>>>>>>>>
>>>>>>>> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
>>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>>>
>>>>>>>> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>>>> # echo c > /proc/sysrq_trigger
>>>>>>>>
>>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>>>> ---
>>>>>>>> arch/loongarch/Kconfig | 22 ++++++
>>>>>>>> arch/loongarch/Makefile | 4 +
>>>>>>>> arch/loongarch/kernel/Makefile | 3 +-
>>>>>>>> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
>>>>>>>> arch/loongarch/kernel/crash_dump.c | 19 +++++
>>>>>>>> arch/loongarch/kernel/machine_kexec.c | 12 ++-
>>>>>>>> arch/loongarch/kernel/mem.c | 6 ++
>>>>>>>> arch/loongarch/kernel/relocate_kernel.S | 6 ++
>>>>>>>> arch/loongarch/kernel/setup.c | 49 ++++++++++++
>>>>>>>> arch/loongarch/kernel/traps.c | 4 +
>>>>>>>> 10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>>>> create mode 100644 arch/loongarch/kernel/crash.c
>>>>>>>> create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>>>> --- a/arch/loongarch/Kconfig
>>>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>>>
>>>>>>>> The name comes from the similarity to the exec system call.
>>>>>>>>
>>>>>>>> +config CRASH_DUMP
>>>>>>>> + bool "Build kdump crash kernel"
>>>>>>>> + help
>>>>>>>> + Generate crash dump after being started by kexec. This should
>>>>>>>> + be normally only set in special crash dump kernels which are
>>>>>>>> + loaded in the main kernel with kexec-tools into a specially
>>>>>>>> + reserved region and then later executed after a crash by
>>>>>>>> + kdump/kexec.
>>>>>>>> +
>>>>>>>> + For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>>>> +
>>>>>>>> +config PHYSICAL_START
>>>>>>>> + hex "Physical address where the kernel is loaded"
>>>>>>>> + default "0x9000000091000000" if 64BIT
>>>>>>>> + depends on CRASH_DUMP
>>>>>>>> + help
>>>>>>>> + This gives the XKPRANGE address where the kernel is loaded.
>>>>>>>> + If you plan to use kernel for capturing the crash dump change
>>>>>>>> + this value to start of the reserved region (the "X" value as
>>>>>>>> + specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>>>> + passed to the panic-ed kernel).
>>>>>>>> +
>>>>>>>> config SECCOMP
>>>>>>>> bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>>>> depends on PROC_FS
>>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>>>> --- a/arch/loongarch/Makefile
>>>>>>>> +++ b/arch/loongarch/Makefile
>>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>>>> cflags-y += -ffreestanding
>>>>>>>> cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>>>
>>>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>>>> +load-y = $(CONFIG_PHYSICAL_START)
>>>>>>>> +else
>>>>>>>> load-y = 0x9000000000200000
>>>>>>>> +endif
>>>>>>>> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>>>
>>>>>>>> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
>>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
>>>>>>>> obj-$(CONFIG_MODULES) += module.o module-sections.o
>>>>>>>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
>>>>>>>>
>>>>>>>> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
>>>>>>>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
>>>>>>>> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
>>>>>>>>
>>>>>>>> obj-$(CONFIG_PROC_FS) += proc.o
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..b4f249ec6301
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>>>> @@ -0,0 +1,100 @@
>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>> +/*
>>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>>>> + *
>>>>>>>> + * Derived from MIPS
>>>>>>>> + */
>>>>>>>> +#include <linux/kernel.h>
>>>>>>>> +#include <linux/smp.h>
>>>>>>>> +#include <linux/reboot.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>> +#include <linux/delay.h>
>>>>>>>> +#include <linux/irq.h>
>>>>>>>> +#include <linux/types.h>
>>>>>>>> +#include <linux/sched.h>
>>>>>>>> +#include <linux/sched/task_stack.h>
>>>>>>>> +#include <asm/cacheflush.h>
>>>>>>>> +#include <asm/kexec.h>
>>>>>>>> +
>>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_SMP
>>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>>>> +{
>>>>>>>> + struct pt_regs *regs = passed_regs;
>>>>>>>> + int cpu = smp_processor_id();
>>>>>>>> +
>>>>>>>> + /*
>>>>>>>> + * If we are passed registers, use those. Otherwise get the
>>>>>>>> + * regs from the last interrupt, which should be correct, as
>>>>>>>> + * we are in an interrupt. But if the regs are not there,
>>>>>>>> + * pull them from the top of the stack. They are probably
>>>>>>>> + * wrong, but we need something to keep from crashing again.
>>>>>>>> + */
>>>>>>>> + if (!regs)
>>>>>>>> + regs = get_irq_regs();
>>>>>>>> + if (!regs)
>>>>>>>> + regs = task_pt_regs(current);
>>>>>>>> +
>>>>>>>> + local_irq_disable();
>>>>>>>> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>>>> + crash_save_cpu(regs, cpu);
>>>>>>>> + cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>>>> +
>>>>>>>> + while (!atomic_read(&kexec_ready_to_reboot))
>>>>>>>> + cpu_relax();
>>>>>>>> +
>>>>>>>> + kexec_reboot();
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>>>> +void crash_smp_send_stop(void)
>>>>>>>> +{
>>>>>>>> + static int cpus_stopped;
>>>>>>>> + unsigned long timeout;
>>>>>>>> + unsigned int ncpus;
>>>>>>>> +
>>>>>>>> + /*
>>>>>>>> + * This function can be called twice in panic path, but obviously
>>>>>>>> + * we execute this only once.
>>>>>>>> + */
>>>>>>>> + if (cpus_stopped)
>>>>>>>> + return;
>>>>>>>> +
>>>>>>>> + cpus_stopped = 1;
>>>>>>>> +
>>>>>>>> + /* Excluding the panic cpu */
>>>>>>>> + ncpus = num_online_cpus() - 1;
>>>>>>>> +
>>>>>>>> + smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>>>> + smp_wmb();
>>>>>>>> +
>>>>>>>> + /*
>>>>>>>> + * The crash CPU sends an IPI and wait for other CPUs to
>>>>>>>> + * respond. Delay of at least 10 seconds.
>>>>>>>> + */
>>>>>>>> + pr_emerg("Sending IPI to other cpus...\n");
>>>>>>>> + timeout = USEC_PER_SEC * 10;
>>>>>>>> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>>>> + cpu_relax();
>>>>>>>> + udelay(1);
>>>>>>>> + }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>> +{
>>>>>>>> + int crashing_cpu;
>>>>>>>> +
>>>>>>>> + local_irq_disable();
>>>>>>>> +
>>>>>>>> + crashing_cpu = smp_processor_id();
>>>>>>>> + crash_save_cpu(regs, crashing_cpu);
>>>>>>>> +
>>>>>>>> + /* shutdown non-crashing cpus */
>>>>>>>> + crash_smp_send_stop();
>>>>>>>> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>>>> +
>>>>>>>> + pr_info("Starting crashdump kernel...\n");
>>>>>>>> +}
>>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..13e5d2f7870d
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>>>> @@ -0,0 +1,19 @@
>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>> +#include <linux/highmem.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>> +#include <linux/io.h>
>>>>>>>> +
>>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>>>> + size_t csize, unsigned long offset)
>>>>>>>> +{
>>>>>>>> + void *vaddr;
>>>>>>>> +
>>>>>>>> + if (!csize)
>>>>>>>> + return 0;
>>>>>>>> +
>>>>>>>> + vaddr = kmap_local_pfn(pfn);
>>>>>>>> + csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>>>> + kunmap_local(vaddr);
>>>>>>>> +
>>>>>>>> + return csize;
>>>>>>>> +}
>>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>>>> continue;
>>>>>>>> }
>>>>>>>>
>>>>>>>> - /* kexec need a safe page to save reboot_code_buffer */
>>>>>>>> + /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>>>> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>>>
>>>>>>>> reboot_code_buffer =
>>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>>>
>>>>>>>> kexec_reboot();
>>>>>>>> }
>>>>>>>> -
>>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>> -{
>>>>>>>> -}
>>>>>>>> #endif
>>>>>>>>
>>>>>>>> void machine_shutdown(void)
>>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>
>>>>>>>> jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>>>
>>>>>>>> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>> + if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>>>> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>
>>>>>>>> /*
>>>>>>>> * The generic kexec code builds a page list with physical
>>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>
>>>>>>>> /*
>>>>>>>> * We know we were online, and there will be no incoming IPIs at
>>>>>>>> - * this point.
>>>>>>>> + * this point. Mark online again before rebooting so that the crash
>>>>>>>> + * analysis tool will see us correctly.
>>>>>>>> */
>>>>>>>> set_cpu_online(smp_processor_id(), true);
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>>>> @@ -5,6 +5,7 @@
>>>>>>>> #include <linux/efi.h>
>>>>>>>> #include <linux/initrd.h>
>>>>>>>> #include <linux/memblock.h>
>>>>>>>> +#include <linux/of_fdt.h>
>>>>>>>>
>>>>>>>> #include <asm/bootinfo.h>
>>>>>>>> #include <asm/loongson.h>
>>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>>>
>>>>>>>> /* Reserve the initrd */
>>>>>>>> reserve_initrd_mem();
>>>>>>>> +
>>>>>>>> + /* Mainly reserved memory for the elf core head */
>>>>>>>> + early_init_fdt_scan_reserved_mem();
>>>>>>>> + /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>>>> + early_init_dt_check_for_usable_mem_range();
>>>>>>>> }
>>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>>>> move s2, a2
>>>>>>>> move s3, a3
>>>>>>>>
>>>>>>>> + /*
>>>>>>>> + * In case of a kdump/crash kernel, the indirection page is not
>>>>>>>> + * populated as the kernel is directly copied to a reserved location
>>>>>>>> + */
>>>>>>>> + beqz s2, done
>>>>>>>> +
>>>>>>>> process_entry:
>>>>>>>> PTR_L s4, s2, 0
>>>>>>>> PTR_ADDI s2, s2, SZREG
>>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>>>> @@ -19,6 +19,8 @@
>>>>>>>> #include <linux/memblock.h>
>>>>>>>> #include <linux/initrd.h>
>>>>>>>> #include <linux/ioport.h>
>>>>>>>> +#include <linux/kexec.h>
>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>> #include <linux/root_dev.h>
>>>>>>>> #include <linux/console.h>
>>>>>>>> #include <linux/pfn.h>
>>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>>>> }
>>>>>>>> early_param("mem", early_parse_mem);
>>>>>>>>
>>>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>>>> +{
>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>> + unsigned long long start;
>>>>>>>> + unsigned long long total_mem;
>>>>>>>> + unsigned long long crash_size, crash_base;
>>>>>>>> + int ret;
>>>>>>>> +
>>>>>>>> + total_mem = memblock_phys_mem_size();
>>>>>>>> + ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>>>> + &crash_size, &crash_base);
>>>>>>>> + if (ret != 0 || crash_size <= 0)
>>>>>>>> + return;
>>>>>>>> +
>>>>>>>> +
>>>>>>>> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>>>> + crash_base + crash_size);
>>>>>>>> + if (start != crash_base) {
>>>>>>>> + pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>>>> + return;
>>>>>>>> + }
>>>>>>>> +
>>>>>>>> + crashk_res.start = crash_base;
>>>>>>>> + crashk_res.end = crash_base + crash_size - 1;
>>>>>>>> +#endif
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>>>> +{
>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>> + int ret;
>>>>>>>> +
>>>>>>>> + if (crashk_res.start == crashk_res.end)
>>>>>>>> + return;
>>>>>>>> +
>>>>>>>> + ret = request_resource(res, &crashk_res);
>>>>>>>> + if (!ret)
>>>>>>>> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>>>> + (unsigned long)((crashk_res.end -
>>>>>>>> + crashk_res.start + 1) >> 20),
>>>>>>>> + (unsigned long)(crashk_res.start >> 20));
>>>>>>>> +#endif
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> void __init platform_init(void)
>>>>>>>> {
>>>>>>>> efi_init();
>>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>>>
>>>>>>>> check_kernel_sections_mem();
>>>>>>>>
>>>>>>>> + loongarch_parse_crashkernel();
>>>>>>>> +
>>>>>>>> /*
>>>>>>>> * In order to reduce the possibility of kernel panic when failed to
>>>>>>>> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>>>> request_resource(res, &code_resource);
>>>>>>>> request_resource(res, &data_resource);
>>>>>>>> request_resource(res, &bss_resource);
>>>>>>>> + request_crashkernel(res);
>>>>>>>> }
>>>>>>>> }
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>> #include <linux/entry-common.h>
>>>>>>>> #include <linux/init.h>
>>>>>>>> #include <linux/kernel.h>
>>>>>>>> +#include <linux/kexec.h>
>>>>>>>> #include <linux/module.h>
>>>>>>>> #include <linux/extable.h>
>>>>>>>> #include <linux/mm.h>
>>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>>>
>>>>>>>> oops_exit();
>>>>>>>>
>>>>>>>> + if (regs && kexec_should_crash(current))
>>>>>>>> + crash_kexec(regs);
>>>>>>>> +
>>>>>>>> if (in_interrupt())
>>>>>>>> panic("Fatal exception in interrupt");
>>>>>>>>
>>>>>>>> --
>>>>>>>> 2.36.0
>>>>>>>>
>>>>>>
>>>>
>>
Hi, Youling,
On Mon, Sep 5, 2022 at 3:45 PM Youling Tang <tangyouling@loongson.cn> wrote:
>
> Hi, Huacai
>
> On 09/05/2022 03:32 PM, Huacai Chen wrote:
> > Hi, Youling,
> >
> > On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>
> >>
> >>
> >> On 09/05/2022 10:14 AM, Huacai Chen wrote:
> >>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>
> >>>> Hi, Huacai
> >>>>
> >>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
> >>>>> Hi, Youling,
> >>>>>
> >>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>
> >>>>>> Hi, Huacai
> >>>>>>
> >>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
> >>>>>>> Hi, Youling,
> >>>>>>>
> >>>>>>> I think crash.c can be merged into crash_dump.c
> >>>>>>
> >>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
> >>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
> >>>>>> consistency?
> >>>>>>
> >>>>>> Thanks,
> >>>>>> Youling
> >>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
> >>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
> >>>>
> >>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
> >>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
> >>>> into machine_kexec.c, should crash_shutdown_secondary and
> >>>> crash_smp_send_stop be placed in smp.c?
> >>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
> >>
> >> Ok, I'll merge all into machine_kexec.c.
> >>
> >> Youling.
> > Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
> > If you want to skip the "low memory", maybe we can use
> > 0x9000000090000000 or 0x90000000a0000000?
>
> Because there are many holes in our memory layout, if PHYSICAL_START is
> set to 0x90000000a0000000, the largest reserved area of the crashkernel
> will be 512M, beyond which it will fail.
Then 0x9000000090000000 is not suitable, but I think 512M is enough?
If so, let's use 0x90000000a0000000.
Huacai
Huacai
>
> # cat /proc/iomem
> 90400000-bfffffff : System RAM
> c0020000-f9efffff : System RAM
> f6810000-f6813fff : Reserved
>
> The second System RAM starts at 0x90400000, so 0x9000000090000000 will
> be too small.
>
> Youling.
>
> >
> > Huacai
> >>
> >>>
> >>> Huacai
> >>>>
> >>>> Youling.
> >>>>>
> >>>>> Huacai
> >>>>>>
> >>>>>>>
> >>>>>>> Huacai
> >>>>>>>
> >>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
> >>>>>>>>
> >>>>>>>> This patch adds support for kdump, the kernel will reserve a region
> >>>>>>>> for the crash kernel and jump there on panic.
> >>>>>>>>
> >>>>>>>> Arch-specific functions are added to allow for implementing a crash
> >>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
> >>>>>>>>
> >>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
> >>>>>>>> separate region for the core's ELF header within crash kdump kernel
> >>>>>>>> memory and filling it in when executing kexec_load().
> >>>>>>>>
> >>>>>>>> Then, its location will be advertised to crash dump kernel via a new
> >>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
> >>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
> >>>>>>>>
> >>>>>>>> At the same time, it will also limit the crash kdump kernel to the
> >>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
> >>>>>>>> so as not to destroy the original kernel dump data.
> >>>>>>>>
> >>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
> >>>>>>>> with copy_oldmem_page().
> >>>>>>>>
> >>>>>>>> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
> >>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
> >>>>>>>> triggering a crash through /proc/sysrq_trigger:
> >>>>>>>>
> >>>>>>>> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
> >>>>>>>> # echo c > /proc/sysrq_trigger
> >>>>>>>>
> >>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> >>>>>>>> ---
> >>>>>>>> arch/loongarch/Kconfig | 22 ++++++
> >>>>>>>> arch/loongarch/Makefile | 4 +
> >>>>>>>> arch/loongarch/kernel/Makefile | 3 +-
> >>>>>>>> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
> >>>>>>>> arch/loongarch/kernel/crash_dump.c | 19 +++++
> >>>>>>>> arch/loongarch/kernel/machine_kexec.c | 12 ++-
> >>>>>>>> arch/loongarch/kernel/mem.c | 6 ++
> >>>>>>>> arch/loongarch/kernel/relocate_kernel.S | 6 ++
> >>>>>>>> arch/loongarch/kernel/setup.c | 49 ++++++++++++
> >>>>>>>> arch/loongarch/kernel/traps.c | 4 +
> >>>>>>>> 10 files changed, 217 insertions(+), 8 deletions(-)
> >>>>>>>> create mode 100644 arch/loongarch/kernel/crash.c
> >>>>>>>> create mode 100644 arch/loongarch/kernel/crash_dump.c
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
> >>>>>>>> --- a/arch/loongarch/Kconfig
> >>>>>>>> +++ b/arch/loongarch/Kconfig
> >>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
> >>>>>>>>
> >>>>>>>> The name comes from the similarity to the exec system call.
> >>>>>>>>
> >>>>>>>> +config CRASH_DUMP
> >>>>>>>> + bool "Build kdump crash kernel"
> >>>>>>>> + help
> >>>>>>>> + Generate crash dump after being started by kexec. This should
> >>>>>>>> + be normally only set in special crash dump kernels which are
> >>>>>>>> + loaded in the main kernel with kexec-tools into a specially
> >>>>>>>> + reserved region and then later executed after a crash by
> >>>>>>>> + kdump/kexec.
> >>>>>>>> +
> >>>>>>>> + For more details see Documentation/admin-guide/kdump/kdump.rst
> >>>>>>>> +
> >>>>>>>> +config PHYSICAL_START
> >>>>>>>> + hex "Physical address where the kernel is loaded"
> >>>>>>>> + default "0x9000000091000000" if 64BIT
> >>>>>>>> + depends on CRASH_DUMP
> >>>>>>>> + help
> >>>>>>>> + This gives the XKPRANGE address where the kernel is loaded.
> >>>>>>>> + If you plan to use kernel for capturing the crash dump change
> >>>>>>>> + this value to start of the reserved region (the "X" value as
> >>>>>>>> + specified in the "crashkernel=YM@XM" command line boot parameter
> >>>>>>>> + passed to the panic-ed kernel).
> >>>>>>>> +
> >>>>>>>> config SECCOMP
> >>>>>>>> bool "Enable seccomp to safely compute untrusted bytecode"
> >>>>>>>> depends on PROC_FS
> >>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> >>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
> >>>>>>>> --- a/arch/loongarch/Makefile
> >>>>>>>> +++ b/arch/loongarch/Makefile
> >>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
> >>>>>>>> cflags-y += -ffreestanding
> >>>>>>>> cflags-y += $(call cc-option, -mno-check-zero-division)
> >>>>>>>>
> >>>>>>>> +ifdef CONFIG_PHYSICAL_START
> >>>>>>>> +load-y = $(CONFIG_PHYSICAL_START)
> >>>>>>>> +else
> >>>>>>>> load-y = 0x9000000000200000
> >>>>>>>> +endif
> >>>>>>>> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
> >>>>>>>>
> >>>>>>>> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
> >>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> >>>>>>>> index 20b64ac3f128..df5aea129364 100644
> >>>>>>>> --- a/arch/loongarch/kernel/Makefile
> >>>>>>>> +++ b/arch/loongarch/kernel/Makefile
> >>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
> >>>>>>>> obj-$(CONFIG_MODULES) += module.o module-sections.o
> >>>>>>>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> >>>>>>>>
> >>>>>>>> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
> >>>>>>>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
> >>>>>>>> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
> >>>>>>>>
> >>>>>>>> obj-$(CONFIG_PROC_FS) += proc.o
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..b4f249ec6301
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/loongarch/kernel/crash.c
> >>>>>>>> @@ -0,0 +1,100 @@
> >>>>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>>>> +/*
> >>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
> >>>>>>>> + *
> >>>>>>>> + * Derived from MIPS
> >>>>>>>> + */
> >>>>>>>> +#include <linux/kernel.h>
> >>>>>>>> +#include <linux/smp.h>
> >>>>>>>> +#include <linux/reboot.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>> +#include <linux/delay.h>
> >>>>>>>> +#include <linux/irq.h>
> >>>>>>>> +#include <linux/types.h>
> >>>>>>>> +#include <linux/sched.h>
> >>>>>>>> +#include <linux/sched/task_stack.h>
> >>>>>>>> +#include <asm/cacheflush.h>
> >>>>>>>> +#include <asm/kexec.h>
> >>>>>>>> +
> >>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
> >>>>>>>> +
> >>>>>>>> +#ifdef CONFIG_SMP
> >>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
> >>>>>>>> +{
> >>>>>>>> + struct pt_regs *regs = passed_regs;
> >>>>>>>> + int cpu = smp_processor_id();
> >>>>>>>> +
> >>>>>>>> + /*
> >>>>>>>> + * If we are passed registers, use those. Otherwise get the
> >>>>>>>> + * regs from the last interrupt, which should be correct, as
> >>>>>>>> + * we are in an interrupt. But if the regs are not there,
> >>>>>>>> + * pull them from the top of the stack. They are probably
> >>>>>>>> + * wrong, but we need something to keep from crashing again.
> >>>>>>>> + */
> >>>>>>>> + if (!regs)
> >>>>>>>> + regs = get_irq_regs();
> >>>>>>>> + if (!regs)
> >>>>>>>> + regs = task_pt_regs(current);
> >>>>>>>> +
> >>>>>>>> + local_irq_disable();
> >>>>>>>> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
> >>>>>>>> + crash_save_cpu(regs, cpu);
> >>>>>>>> + cpumask_set_cpu(cpu, &cpus_in_crash);
> >>>>>>>> +
> >>>>>>>> + while (!atomic_read(&kexec_ready_to_reboot))
> >>>>>>>> + cpu_relax();
> >>>>>>>> +
> >>>>>>>> + kexec_reboot();
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +/* Override the weak function in kernel/panic.c */
> >>>>>>>> +void crash_smp_send_stop(void)
> >>>>>>>> +{
> >>>>>>>> + static int cpus_stopped;
> >>>>>>>> + unsigned long timeout;
> >>>>>>>> + unsigned int ncpus;
> >>>>>>>> +
> >>>>>>>> + /*
> >>>>>>>> + * This function can be called twice in panic path, but obviously
> >>>>>>>> + * we execute this only once.
> >>>>>>>> + */
> >>>>>>>> + if (cpus_stopped)
> >>>>>>>> + return;
> >>>>>>>> +
> >>>>>>>> + cpus_stopped = 1;
> >>>>>>>> +
> >>>>>>>> + /* Excluding the panic cpu */
> >>>>>>>> + ncpus = num_online_cpus() - 1;
> >>>>>>>> +
> >>>>>>>> + smp_call_function(crash_shutdown_secondary, NULL, 0);
> >>>>>>>> + smp_wmb();
> >>>>>>>> +
> >>>>>>>> + /*
> >>>>>>>> + * The crash CPU sends an IPI and wait for other CPUs to
> >>>>>>>> + * respond. Delay of at least 10 seconds.
> >>>>>>>> + */
> >>>>>>>> + pr_emerg("Sending IPI to other cpus...\n");
> >>>>>>>> + timeout = USEC_PER_SEC * 10;
> >>>>>>>> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
> >>>>>>>> + cpu_relax();
> >>>>>>>> + udelay(1);
> >>>>>>>> + }
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +#endif
> >>>>>>>> +
> >>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>>>> +{
> >>>>>>>> + int crashing_cpu;
> >>>>>>>> +
> >>>>>>>> + local_irq_disable();
> >>>>>>>> +
> >>>>>>>> + crashing_cpu = smp_processor_id();
> >>>>>>>> + crash_save_cpu(regs, crashing_cpu);
> >>>>>>>> +
> >>>>>>>> + /* shutdown non-crashing cpus */
> >>>>>>>> + crash_smp_send_stop();
> >>>>>>>> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
> >>>>>>>> +
> >>>>>>>> + pr_info("Starting crashdump kernel...\n");
> >>>>>>>> +}
> >>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..13e5d2f7870d
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
> >>>>>>>> @@ -0,0 +1,19 @@
> >>>>>>>> +// SPDX-License-Identifier: GPL-2.0
> >>>>>>>> +#include <linux/highmem.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>> +#include <linux/io.h>
> >>>>>>>> +
> >>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
> >>>>>>>> + size_t csize, unsigned long offset)
> >>>>>>>> +{
> >>>>>>>> + void *vaddr;
> >>>>>>>> +
> >>>>>>>> + if (!csize)
> >>>>>>>> + return 0;
> >>>>>>>> +
> >>>>>>>> + vaddr = kmap_local_pfn(pfn);
> >>>>>>>> + csize = copy_to_iter(vaddr + offset, csize, iter);
> >>>>>>>> + kunmap_local(vaddr);
> >>>>>>>> +
> >>>>>>>> + return csize;
> >>>>>>>> +}
> >>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
> >>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
> >>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
> >>>>>>>> continue;
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> - /* kexec need a safe page to save reboot_code_buffer */
> >>>>>>>> + /* kexec/kdump need a safe page to save reboot_code_buffer */
> >>>>>>>> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
> >>>>>>>>
> >>>>>>>> reboot_code_buffer =
> >>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
> >>>>>>>>
> >>>>>>>> kexec_reboot();
> >>>>>>>> }
> >>>>>>>> -
> >>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
> >>>>>>>> -{
> >>>>>>>> -}
> >>>>>>>> #endif
> >>>>>>>>
> >>>>>>>> void machine_shutdown(void)
> >>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>>>
> >>>>>>>> jump_addr = (unsigned long)phys_to_virt(image->start);
> >>>>>>>>
> >>>>>>>> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>>> + if (image->type == KEXEC_TYPE_DEFAULT)
> >>>>>>>> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> >>>>>>>>
> >>>>>>>> /*
> >>>>>>>> * The generic kexec code builds a page list with physical
> >>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
> >>>>>>>>
> >>>>>>>> /*
> >>>>>>>> * We know we were online, and there will be no incoming IPIs at
> >>>>>>>> - * this point.
> >>>>>>>> + * this point. Mark online again before rebooting so that the crash
> >>>>>>>> + * analysis tool will see us correctly.
> >>>>>>>> */
> >>>>>>>> set_cpu_online(smp_processor_id(), true);
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
> >>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
> >>>>>>>> --- a/arch/loongarch/kernel/mem.c
> >>>>>>>> +++ b/arch/loongarch/kernel/mem.c
> >>>>>>>> @@ -5,6 +5,7 @@
> >>>>>>>> #include <linux/efi.h>
> >>>>>>>> #include <linux/initrd.h>
> >>>>>>>> #include <linux/memblock.h>
> >>>>>>>> +#include <linux/of_fdt.h>
> >>>>>>>>
> >>>>>>>> #include <asm/bootinfo.h>
> >>>>>>>> #include <asm/loongson.h>
> >>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
> >>>>>>>>
> >>>>>>>> /* Reserve the initrd */
> >>>>>>>> reserve_initrd_mem();
> >>>>>>>> +
> >>>>>>>> + /* Mainly reserved memory for the elf core head */
> >>>>>>>> + early_init_fdt_scan_reserved_mem();
> >>>>>>>> + /* Parse linux,usable-memory-range is for crash dump kernel */
> >>>>>>>> + early_init_dt_check_for_usable_mem_range();
> >>>>>>>> }
> >>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
> >>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
> >>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
> >>>>>>>> move s2, a2
> >>>>>>>> move s3, a3
> >>>>>>>>
> >>>>>>>> + /*
> >>>>>>>> + * In case of a kdump/crash kernel, the indirection page is not
> >>>>>>>> + * populated as the kernel is directly copied to a reserved location
> >>>>>>>> + */
> >>>>>>>> + beqz s2, done
> >>>>>>>> +
> >>>>>>>> process_entry:
> >>>>>>>> PTR_L s4, s2, 0
> >>>>>>>> PTR_ADDI s2, s2, SZREG
> >>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> >>>>>>>> index f938aae3e92c..ea34b77e402f 100644
> >>>>>>>> --- a/arch/loongarch/kernel/setup.c
> >>>>>>>> +++ b/arch/loongarch/kernel/setup.c
> >>>>>>>> @@ -19,6 +19,8 @@
> >>>>>>>> #include <linux/memblock.h>
> >>>>>>>> #include <linux/initrd.h>
> >>>>>>>> #include <linux/ioport.h>
> >>>>>>>> +#include <linux/kexec.h>
> >>>>>>>> +#include <linux/crash_dump.h>
> >>>>>>>> #include <linux/root_dev.h>
> >>>>>>>> #include <linux/console.h>
> >>>>>>>> #include <linux/pfn.h>
> >>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
> >>>>>>>> }
> >>>>>>>> early_param("mem", early_parse_mem);
> >>>>>>>>
> >>>>>>>> +static void __init loongarch_parse_crashkernel(void)
> >>>>>>>> +{
> >>>>>>>> +#ifdef CONFIG_KEXEC
> >>>>>>>> + unsigned long long start;
> >>>>>>>> + unsigned long long total_mem;
> >>>>>>>> + unsigned long long crash_size, crash_base;
> >>>>>>>> + int ret;
> >>>>>>>> +
> >>>>>>>> + total_mem = memblock_phys_mem_size();
> >>>>>>>> + ret = parse_crashkernel(boot_command_line, total_mem,
> >>>>>>>> + &crash_size, &crash_base);
> >>>>>>>> + if (ret != 0 || crash_size <= 0)
> >>>>>>>> + return;
> >>>>>>>> +
> >>>>>>>> +
> >>>>>>>> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
> >>>>>>>> + crash_base + crash_size);
> >>>>>>>> + if (start != crash_base) {
> >>>>>>>> + pr_warn("Invalid memory region reserved for crash kernel\n");
> >>>>>>>> + return;
> >>>>>>>> + }
> >>>>>>>> +
> >>>>>>>> + crashk_res.start = crash_base;
> >>>>>>>> + crashk_res.end = crash_base + crash_size - 1;
> >>>>>>>> +#endif
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static void __init request_crashkernel(struct resource *res)
> >>>>>>>> +{
> >>>>>>>> +#ifdef CONFIG_KEXEC
> >>>>>>>> + int ret;
> >>>>>>>> +
> >>>>>>>> + if (crashk_res.start == crashk_res.end)
> >>>>>>>> + return;
> >>>>>>>> +
> >>>>>>>> + ret = request_resource(res, &crashk_res);
> >>>>>>>> + if (!ret)
> >>>>>>>> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
> >>>>>>>> + (unsigned long)((crashk_res.end -
> >>>>>>>> + crashk_res.start + 1) >> 20),
> >>>>>>>> + (unsigned long)(crashk_res.start >> 20));
> >>>>>>>> +#endif
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> void __init platform_init(void)
> >>>>>>>> {
> >>>>>>>> efi_init();
> >>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
> >>>>>>>>
> >>>>>>>> check_kernel_sections_mem();
> >>>>>>>>
> >>>>>>>> + loongarch_parse_crashkernel();
> >>>>>>>> +
> >>>>>>>> /*
> >>>>>>>> * In order to reduce the possibility of kernel panic when failed to
> >>>>>>>> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
> >>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
> >>>>>>>> request_resource(res, &code_resource);
> >>>>>>>> request_resource(res, &data_resource);
> >>>>>>>> request_resource(res, &bss_resource);
> >>>>>>>> + request_crashkernel(res);
> >>>>>>>> }
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> >>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
> >>>>>>>> --- a/arch/loongarch/kernel/traps.c
> >>>>>>>> +++ b/arch/loongarch/kernel/traps.c
> >>>>>>>> @@ -10,6 +10,7 @@
> >>>>>>>> #include <linux/entry-common.h>
> >>>>>>>> #include <linux/init.h>
> >>>>>>>> #include <linux/kernel.h>
> >>>>>>>> +#include <linux/kexec.h>
> >>>>>>>> #include <linux/module.h>
> >>>>>>>> #include <linux/extable.h>
> >>>>>>>> #include <linux/mm.h>
> >>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
> >>>>>>>>
> >>>>>>>> oops_exit();
> >>>>>>>>
> >>>>>>>> + if (regs && kexec_should_crash(current))
> >>>>>>>> + crash_kexec(regs);
> >>>>>>>> +
> >>>>>>>> if (in_interrupt())
> >>>>>>>> panic("Fatal exception in interrupt");
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 2.36.0
> >>>>>>>>
> >>>>>>
> >>>>
> >>
>
Hi, Huacai
On 09/05/2022 09:01 PM, Huacai Chen wrote:
> Hi, Youling,
>
> On Mon, Sep 5, 2022 at 3:45 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>
>> Hi, Huacai
>>
>> On 09/05/2022 03:32 PM, Huacai Chen wrote:
>>> Hi, Youling,
>>>
>>> On Mon, Sep 5, 2022 at 10:22 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 09/05/2022 10:14 AM, Huacai Chen wrote:
>>>>> On Mon, Sep 5, 2022 at 10:04 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>
>>>>>> Hi, Huacai
>>>>>>
>>>>>> On 09/05/2022 09:38 AM, Huacai Chen wrote:
>>>>>>> Hi, Youling,
>>>>>>>
>>>>>>> On Mon, Sep 5, 2022 at 8:54 AM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>
>>>>>>>> Hi, Huacai
>>>>>>>>
>>>>>>>> On 09/04/2022 08:21 PM, Huacai Chen wrote:
>>>>>>>>> Hi, Youling,
>>>>>>>>>
>>>>>>>>> I think crash.c can be merged into crash_dump.c
>>>>>>>>
>>>>>>>> Most architectures only implement copy_oldmem_page() in crash_dump.c,
>>>>>>>> I'm not sure if merging crash.c into crash_dump.c will break its
>>>>>>>> consistency?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Youling
>>>>>>> Yes, you are right, crash.c cannot be merged into crash_dump.c, but it
>>>>>>> can be merged into machine_kexec.c, as arm64 and riscv do.
>>>>>>
>>>>>> For arm64, machine_crash_shutdown() is placed in machine_kexec.c, and
>>>>>> crash_smp_send_stop is placed in smp.c. If crash.c needs to be merged
>>>>>> into machine_kexec.c, should crash_shutdown_secondary and
>>>>>> crash_smp_send_stop be placed in smp.c?
>>>>> I don't want to touch smp.c, all merged into machine_kexec.c seems reasonable.
>>>>
>>>> Ok, I'll merge all into machine_kexec.c.
>>>>
>>>> Youling.
>>> Another problem, 0x9000000091000000 for PHYSICAL_START is too tricky.
>>> If you want to skip the "low memory", maybe we can use
>>> 0x9000000090000000 or 0x90000000a0000000?
>>
>> Because there are many holes in our memory layout, if PHYSICAL_START is
>> set to 0x90000000a0000000, the largest reserved area of the crashkernel
>> will be 512M, beyond which it will fail.
> Then 0x9000000090000000 is not suitable, but I think 512M is enough?
> If so, let's use 0x90000000a0000000.
I'm not sure if it's enough for the server machine?
I will change to 0x90000000a0000000.
Youling.
>
> Huacai
>
> Huacai
>>
>> # cat /proc/iomem
>> 90400000-bfffffff : System RAM
>> c0020000-f9efffff : System RAM
>> f6810000-f6813fff : Reserved
>>
>> The second System RAM starts at 0x90400000, so 0x9000000090000000 will
>> be too small.
>>
>> Youling.
>>
>>>
>>> Huacai
>>>>
>>>>>
>>>>> Huacai
>>>>>>
>>>>>> Youling.
>>>>>>>
>>>>>>> Huacai
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Huacai
>>>>>>>>>
>>>>>>>>> On Mon, Aug 29, 2022 at 12:37 PM Youling Tang <tangyouling@loongson.cn> wrote:
>>>>>>>>>>
>>>>>>>>>> This patch adds support for kdump, the kernel will reserve a region
>>>>>>>>>> for the crash kernel and jump there on panic.
>>>>>>>>>>
>>>>>>>>>> Arch-specific functions are added to allow for implementing a crash
>>>>>>>>>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>>>>>>>>>
>>>>>>>>>> A user space tool, like kexec-tools, is responsible for allocating a
>>>>>>>>>> separate region for the core's ELF header within crash kdump kernel
>>>>>>>>>> memory and filling it in when executing kexec_load().
>>>>>>>>>>
>>>>>>>>>> Then, its location will be advertised to crash dump kernel via a new
>>>>>>>>>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>>>>>>>>>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>>>>>>>>>
>>>>>>>>>> At the same time, it will also limit the crash kdump kernel to the
>>>>>>>>>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>>>>>>>>>> so as not to destroy the original kernel dump data.
>>>>>>>>>>
>>>>>>>>>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>>>>>>>>>> with copy_oldmem_page().
>>>>>>>>>>
>>>>>>>>>> I tested this on LoongArch 3A5000 machine and works as expected (Suggest
>>>>>>>>>> crashkernel parameter is "crashkernel=512M@2320M"), you may test it by
>>>>>>>>>> triggering a crash through /proc/sysrq_trigger:
>>>>>>>>>>
>>>>>>>>>> $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>>>>>>>>> # echo c > /proc/sysrq_trigger
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
>>>>>>>>>> ---
>>>>>>>>>> arch/loongarch/Kconfig | 22 ++++++
>>>>>>>>>> arch/loongarch/Makefile | 4 +
>>>>>>>>>> arch/loongarch/kernel/Makefile | 3 +-
>>>>>>>>>> arch/loongarch/kernel/crash.c | 100 ++++++++++++++++++++++++
>>>>>>>>>> arch/loongarch/kernel/crash_dump.c | 19 +++++
>>>>>>>>>> arch/loongarch/kernel/machine_kexec.c | 12 ++-
>>>>>>>>>> arch/loongarch/kernel/mem.c | 6 ++
>>>>>>>>>> arch/loongarch/kernel/relocate_kernel.S | 6 ++
>>>>>>>>>> arch/loongarch/kernel/setup.c | 49 ++++++++++++
>>>>>>>>>> arch/loongarch/kernel/traps.c | 4 +
>>>>>>>>>> 10 files changed, 217 insertions(+), 8 deletions(-)
>>>>>>>>>> create mode 100644 arch/loongarch/kernel/crash.c
>>>>>>>>>> create mode 100644 arch/loongarch/kernel/crash_dump.c
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>>>>>>>> index 903c82fa958d..7c1b07a5b5bd 100644
>>>>>>>>>> --- a/arch/loongarch/Kconfig
>>>>>>>>>> +++ b/arch/loongarch/Kconfig
>>>>>>>>>> @@ -420,6 +420,28 @@ config KEXEC
>>>>>>>>>>
>>>>>>>>>> The name comes from the similarity to the exec system call.
>>>>>>>>>>
>>>>>>>>>> +config CRASH_DUMP
>>>>>>>>>> + bool "Build kdump crash kernel"
>>>>>>>>>> + help
>>>>>>>>>> + Generate crash dump after being started by kexec. This should
>>>>>>>>>> + be normally only set in special crash dump kernels which are
>>>>>>>>>> + loaded in the main kernel with kexec-tools into a specially
>>>>>>>>>> + reserved region and then later executed after a crash by
>>>>>>>>>> + kdump/kexec.
>>>>>>>>>> +
>>>>>>>>>> + For more details see Documentation/admin-guide/kdump/kdump.rst
>>>>>>>>>> +
>>>>>>>>>> +config PHYSICAL_START
>>>>>>>>>> + hex "Physical address where the kernel is loaded"
>>>>>>>>>> + default "0x9000000091000000" if 64BIT
>>>>>>>>>> + depends on CRASH_DUMP
>>>>>>>>>> + help
>>>>>>>>>> + This gives the XKPRANGE address where the kernel is loaded.
>>>>>>>>>> + If you plan to use kernel for capturing the crash dump change
>>>>>>>>>> + this value to start of the reserved region (the "X" value as
>>>>>>>>>> + specified in the "crashkernel=YM@XM" command line boot parameter
>>>>>>>>>> + passed to the panic-ed kernel).
>>>>>>>>>> +
>>>>>>>>>> config SECCOMP
>>>>>>>>>> bool "Enable seccomp to safely compute untrusted bytecode"
>>>>>>>>>> depends on PROC_FS
>>>>>>>>>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>>>>>>>>>> index 4bc47f47cfd8..7dabd580426d 100644
>>>>>>>>>> --- a/arch/loongarch/Makefile
>>>>>>>>>> +++ b/arch/loongarch/Makefile
>>>>>>>>>> @@ -48,7 +48,11 @@ KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>>>>>>>>>> cflags-y += -ffreestanding
>>>>>>>>>> cflags-y += $(call cc-option, -mno-check-zero-division)
>>>>>>>>>>
>>>>>>>>>> +ifdef CONFIG_PHYSICAL_START
>>>>>>>>>> +load-y = $(CONFIG_PHYSICAL_START)
>>>>>>>>>> +else
>>>>>>>>>> load-y = 0x9000000000200000
>>>>>>>>>> +endif
>>>>>>>>>> bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
>>>>>>>>>>
>>>>>>>>>> drivers-$(CONFIG_PCI) += arch/loongarch/pci/
>>>>>>>>>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>>>>>>>>>> index 20b64ac3f128..df5aea129364 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/Makefile
>>>>>>>>>> +++ b/arch/loongarch/kernel/Makefile
>>>>>>>>>> @@ -17,7 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
>>>>>>>>>> obj-$(CONFIG_MODULES) += module.o module-sections.o
>>>>>>>>>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
>>>>>>>>>>
>>>>>>>>>> -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
>>>>>>>>>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
>>>>>>>>>> +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
>>>>>>>>>>
>>>>>>>>>> obj-$(CONFIG_PROC_FS) += proc.o
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 000000000000..b4f249ec6301
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/arch/loongarch/kernel/crash.c
>>>>>>>>>> @@ -0,0 +1,100 @@
>>>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>>>> +/*
>>>>>>>>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>>>>>>>>> + *
>>>>>>>>>> + * Derived from MIPS
>>>>>>>>>> + */
>>>>>>>>>> +#include <linux/kernel.h>
>>>>>>>>>> +#include <linux/smp.h>
>>>>>>>>>> +#include <linux/reboot.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>> +#include <linux/delay.h>
>>>>>>>>>> +#include <linux/irq.h>
>>>>>>>>>> +#include <linux/types.h>
>>>>>>>>>> +#include <linux/sched.h>
>>>>>>>>>> +#include <linux/sched/task_stack.h>
>>>>>>>>>> +#include <asm/cacheflush.h>
>>>>>>>>>> +#include <asm/kexec.h>
>>>>>>>>>> +
>>>>>>>>>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>>>>>>>>> +
>>>>>>>>>> +#ifdef CONFIG_SMP
>>>>>>>>>> +static void crash_shutdown_secondary(void *passed_regs)
>>>>>>>>>> +{
>>>>>>>>>> + struct pt_regs *regs = passed_regs;
>>>>>>>>>> + int cpu = smp_processor_id();
>>>>>>>>>> +
>>>>>>>>>> + /*
>>>>>>>>>> + * If we are passed registers, use those. Otherwise get the
>>>>>>>>>> + * regs from the last interrupt, which should be correct, as
>>>>>>>>>> + * we are in an interrupt. But if the regs are not there,
>>>>>>>>>> + * pull them from the top of the stack. They are probably
>>>>>>>>>> + * wrong, but we need something to keep from crashing again.
>>>>>>>>>> + */
>>>>>>>>>> + if (!regs)
>>>>>>>>>> + regs = get_irq_regs();
>>>>>>>>>> + if (!regs)
>>>>>>>>>> + regs = task_pt_regs(current);
>>>>>>>>>> +
>>>>>>>>>> + local_irq_disable();
>>>>>>>>>> + if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>>>>>>>>>> + crash_save_cpu(regs, cpu);
>>>>>>>>>> + cpumask_set_cpu(cpu, &cpus_in_crash);
>>>>>>>>>> +
>>>>>>>>>> + while (!atomic_read(&kexec_ready_to_reboot))
>>>>>>>>>> + cpu_relax();
>>>>>>>>>> +
>>>>>>>>>> + kexec_reboot();
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +/* Override the weak function in kernel/panic.c */
>>>>>>>>>> +void crash_smp_send_stop(void)
>>>>>>>>>> +{
>>>>>>>>>> + static int cpus_stopped;
>>>>>>>>>> + unsigned long timeout;
>>>>>>>>>> + unsigned int ncpus;
>>>>>>>>>> +
>>>>>>>>>> + /*
>>>>>>>>>> + * This function can be called twice in panic path, but obviously
>>>>>>>>>> + * we execute this only once.
>>>>>>>>>> + */
>>>>>>>>>> + if (cpus_stopped)
>>>>>>>>>> + return;
>>>>>>>>>> +
>>>>>>>>>> + cpus_stopped = 1;
>>>>>>>>>> +
>>>>>>>>>> + /* Excluding the panic cpu */
>>>>>>>>>> + ncpus = num_online_cpus() - 1;
>>>>>>>>>> +
>>>>>>>>>> + smp_call_function(crash_shutdown_secondary, NULL, 0);
>>>>>>>>>> + smp_wmb();
>>>>>>>>>> +
>>>>>>>>>> + /*
>>>>>>>>>> + * The crash CPU sends an IPI and wait for other CPUs to
>>>>>>>>>> + * respond. Delay of at least 10 seconds.
>>>>>>>>>> + */
>>>>>>>>>> + pr_emerg("Sending IPI to other cpus...\n");
>>>>>>>>>> + timeout = USEC_PER_SEC * 10;
>>>>>>>>>> + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>>>>>>>>>> + cpu_relax();
>>>>>>>>>> + udelay(1);
>>>>>>>>>> + }
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +#endif
>>>>>>>>>> +
>>>>>>>>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>>>> +{
>>>>>>>>>> + int crashing_cpu;
>>>>>>>>>> +
>>>>>>>>>> + local_irq_disable();
>>>>>>>>>> +
>>>>>>>>>> + crashing_cpu = smp_processor_id();
>>>>>>>>>> + crash_save_cpu(regs, crashing_cpu);
>>>>>>>>>> +
>>>>>>>>>> + /* shutdown non-crashing cpus */
>>>>>>>>>> + crash_smp_send_stop();
>>>>>>>>>> + cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>>>>>>>>>> +
>>>>>>>>>> + pr_info("Starting crashdump kernel...\n");
>>>>>>>>>> +}
>>>>>>>>>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 000000000000..13e5d2f7870d
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/arch/loongarch/kernel/crash_dump.c
>>>>>>>>>> @@ -0,0 +1,19 @@
>>>>>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>>>>>> +#include <linux/highmem.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>> +#include <linux/io.h>
>>>>>>>>>> +
>>>>>>>>>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>>>>>>>>>> + size_t csize, unsigned long offset)
>>>>>>>>>> +{
>>>>>>>>>> + void *vaddr;
>>>>>>>>>> +
>>>>>>>>>> + if (!csize)
>>>>>>>>>> + return 0;
>>>>>>>>>> +
>>>>>>>>>> + vaddr = kmap_local_pfn(pfn);
>>>>>>>>>> + csize = copy_to_iter(vaddr + offset, csize, iter);
>>>>>>>>>> + kunmap_local(vaddr);
>>>>>>>>>> +
>>>>>>>>>> + return csize;
>>>>>>>>>> +}
>>>>>>>>>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> index 4ffcd4cd9c8c..f793a3ff09a3 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>>>>>>>>> @@ -69,7 +69,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>>>>>>>>> continue;
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> - /* kexec need a safe page to save reboot_code_buffer */
>>>>>>>>>> + /* kexec/kdump need a safe page to save reboot_code_buffer */
>>>>>>>>>> kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>>>>>>>>>
>>>>>>>>>> reboot_code_buffer =
>>>>>>>>>> @@ -113,10 +113,6 @@ static void kexec_shutdown_secondary(void *)
>>>>>>>>>>
>>>>>>>>>> kexec_reboot();
>>>>>>>>>> }
>>>>>>>>>> -
>>>>>>>>>> -void machine_crash_shutdown(struct pt_regs *regs)
>>>>>>>>>> -{
>>>>>>>>>> -}
>>>>>>>>>> #endif
>>>>>>>>>>
>>>>>>>>>> void machine_shutdown(void)
>>>>>>>>>> @@ -135,7 +131,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>>>
>>>>>>>>>> jump_addr = (unsigned long)phys_to_virt(image->start);
>>>>>>>>>>
>>>>>>>>>> - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>>> + if (image->type == KEXEC_TYPE_DEFAULT)
>>>>>>>>>> + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>>>>>>>>>>
>>>>>>>>>> /*
>>>>>>>>>> * The generic kexec code builds a page list with physical
>>>>>>>>>> @@ -167,7 +164,8 @@ void machine_kexec(struct kimage *image)
>>>>>>>>>>
>>>>>>>>>> /*
>>>>>>>>>> * We know we were online, and there will be no incoming IPIs at
>>>>>>>>>> - * this point.
>>>>>>>>>> + * this point. Mark online again before rebooting so that the crash
>>>>>>>>>> + * analysis tool will see us correctly.
>>>>>>>>>> */
>>>>>>>>>> set_cpu_online(smp_processor_id(), true);
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>>>>>>>>>> index 7423361b0ebc..c6def6ff81c8 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/mem.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/mem.c
>>>>>>>>>> @@ -5,6 +5,7 @@
>>>>>>>>>> #include <linux/efi.h>
>>>>>>>>>> #include <linux/initrd.h>
>>>>>>>>>> #include <linux/memblock.h>
>>>>>>>>>> +#include <linux/of_fdt.h>
>>>>>>>>>>
>>>>>>>>>> #include <asm/bootinfo.h>
>>>>>>>>>> #include <asm/loongson.h>
>>>>>>>>>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>>>>>>>>>
>>>>>>>>>> /* Reserve the initrd */
>>>>>>>>>> reserve_initrd_mem();
>>>>>>>>>> +
>>>>>>>>>> + /* Mainly reserved memory for the elf core head */
>>>>>>>>>> + early_init_fdt_scan_reserved_mem();
>>>>>>>>>> + /* Parse linux,usable-memory-range is for crash dump kernel */
>>>>>>>>>> + early_init_dt_check_for_usable_mem_range();
>>>>>>>>>> }
>>>>>>>>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> index d1f242f74ea8..4ee5ac4ac2d7 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>>>>>>>>> @@ -28,6 +28,12 @@ SYM_CODE_START(relocate_new_kernel)
>>>>>>>>>> move s2, a2
>>>>>>>>>> move s3, a3
>>>>>>>>>>
>>>>>>>>>> + /*
>>>>>>>>>> + * In case of a kdump/crash kernel, the indirection page is not
>>>>>>>>>> + * populated as the kernel is directly copied to a reserved location
>>>>>>>>>> + */
>>>>>>>>>> + beqz s2, done
>>>>>>>>>> +
>>>>>>>>>> process_entry:
>>>>>>>>>> PTR_L s4, s2, 0
>>>>>>>>>> PTR_ADDI s2, s2, SZREG
>>>>>>>>>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>>>>>>>>>> index f938aae3e92c..ea34b77e402f 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/setup.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/setup.c
>>>>>>>>>> @@ -19,6 +19,8 @@
>>>>>>>>>> #include <linux/memblock.h>
>>>>>>>>>> #include <linux/initrd.h>
>>>>>>>>>> #include <linux/ioport.h>
>>>>>>>>>> +#include <linux/kexec.h>
>>>>>>>>>> +#include <linux/crash_dump.h>
>>>>>>>>>> #include <linux/root_dev.h>
>>>>>>>>>> #include <linux/console.h>
>>>>>>>>>> #include <linux/pfn.h>
>>>>>>>>>> @@ -186,6 +188,50 @@ static int __init early_parse_mem(char *p)
>>>>>>>>>> }
>>>>>>>>>> early_param("mem", early_parse_mem);
>>>>>>>>>>
>>>>>>>>>> +static void __init loongarch_parse_crashkernel(void)
>>>>>>>>>> +{
>>>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>>>> + unsigned long long start;
>>>>>>>>>> + unsigned long long total_mem;
>>>>>>>>>> + unsigned long long crash_size, crash_base;
>>>>>>>>>> + int ret;
>>>>>>>>>> +
>>>>>>>>>> + total_mem = memblock_phys_mem_size();
>>>>>>>>>> + ret = parse_crashkernel(boot_command_line, total_mem,
>>>>>>>>>> + &crash_size, &crash_base);
>>>>>>>>>> + if (ret != 0 || crash_size <= 0)
>>>>>>>>>> + return;
>>>>>>>>>> +
>>>>>>>>>> +
>>>>>>>>>> + start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>>>>>>>>>> + crash_base + crash_size);
>>>>>>>>>> + if (start != crash_base) {
>>>>>>>>>> + pr_warn("Invalid memory region reserved for crash kernel\n");
>>>>>>>>>> + return;
>>>>>>>>>> + }
>>>>>>>>>> +
>>>>>>>>>> + crashk_res.start = crash_base;
>>>>>>>>>> + crashk_res.end = crash_base + crash_size - 1;
>>>>>>>>>> +#endif
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void __init request_crashkernel(struct resource *res)
>>>>>>>>>> +{
>>>>>>>>>> +#ifdef CONFIG_KEXEC
>>>>>>>>>> + int ret;
>>>>>>>>>> +
>>>>>>>>>> + if (crashk_res.start == crashk_res.end)
>>>>>>>>>> + return;
>>>>>>>>>> +
>>>>>>>>>> + ret = request_resource(res, &crashk_res);
>>>>>>>>>> + if (!ret)
>>>>>>>>>> + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>>>>>>>>>> + (unsigned long)((crashk_res.end -
>>>>>>>>>> + crashk_res.start + 1) >> 20),
>>>>>>>>>> + (unsigned long)(crashk_res.start >> 20));
>>>>>>>>>> +#endif
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> void __init platform_init(void)
>>>>>>>>>> {
>>>>>>>>>> efi_init();
>>>>>>>>>> @@ -229,6 +275,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>>>>>>>>>
>>>>>>>>>> check_kernel_sections_mem();
>>>>>>>>>>
>>>>>>>>>> + loongarch_parse_crashkernel();
>>>>>>>>>> +
>>>>>>>>>> /*
>>>>>>>>>> * In order to reduce the possibility of kernel panic when failed to
>>>>>>>>>> * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>>>>>>>>>> @@ -290,6 +338,7 @@ static void __init resource_init(void)
>>>>>>>>>> request_resource(res, &code_resource);
>>>>>>>>>> request_resource(res, &data_resource);
>>>>>>>>>> request_resource(res, &bss_resource);
>>>>>>>>>> + request_crashkernel(res);
>>>>>>>>>> }
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>>>>>>>>>> index aa1c95aaf595..0e610872f3f4 100644
>>>>>>>>>> --- a/arch/loongarch/kernel/traps.c
>>>>>>>>>> +++ b/arch/loongarch/kernel/traps.c
>>>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>>>> #include <linux/entry-common.h>
>>>>>>>>>> #include <linux/init.h>
>>>>>>>>>> #include <linux/kernel.h>
>>>>>>>>>> +#include <linux/kexec.h>
>>>>>>>>>> #include <linux/module.h>
>>>>>>>>>> #include <linux/extable.h>
>>>>>>>>>> #include <linux/mm.h>
>>>>>>>>>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>>>>>>>>>
>>>>>>>>>> oops_exit();
>>>>>>>>>>
>>>>>>>>>> + if (regs && kexec_should_crash(current))
>>>>>>>>>> + crash_kexec(regs);
>>>>>>>>>> +
>>>>>>>>>> if (in_interrupt())
>>>>>>>>>> panic("Fatal exception in interrupt");
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>> 2.36.0
>>>>>>>>>>
>>>>>>>>
>>>>>>
>>>>
>>
© 2016 - 2026 Red Hat, Inc.