[PATCH v5 8/8] LoongArch: Add ORC stack unwinder support

Tiezhu Yang posted 8 patches 2 years ago
[PATCH v5 8/8] LoongArch: Add ORC stack unwinder support
Posted by Tiezhu Yang 2 years ago
The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
similar in concept to a DWARF unwinder. The difference is that the format
of the ORC data is much simpler than DWARF, which in turn allows the ORC
unwinder to be much simpler and faster.

The ORC data consists of unwind tables which are generated by objtool.
After analyzing all the code paths of a .o file, it determines information
about the stack state at each instruction address in the file and outputs
that information to the .orc_unwind and .orc_unwind_ip sections.

The per-object ORC sections are combined at link time and are sorted and
post-processed at boot time. The unwinder uses the resulting data to
correlate instruction addresses with their stack states at run time.

Most of the logic are similar with x86, in order to get ra info before ra
is saved into stack, add ra_reg and ra_offset into orc_entry. At the same
time, modify some arch-specific code to silence the objtool warnings.

Co-developed-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Co-developed-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
---
 arch/loongarch/Kconfig                    |   2 +
 arch/loongarch/Kconfig.debug              |  11 +
 arch/loongarch/Makefile                   |  19 +
 arch/loongarch/include/asm/Kbuild         |   2 +
 arch/loongarch/include/asm/bug.h          |   1 +
 arch/loongarch/include/asm/exception.h    |   2 +
 arch/loongarch/include/asm/module.h       |   7 +
 arch/loongarch/include/asm/orc_header.h   |  18 +
 arch/loongarch/include/asm/orc_lookup.h   |  31 ++
 arch/loongarch/include/asm/orc_types.h    |  58 +++
 arch/loongarch/include/asm/stackframe.h   |   3 +
 arch/loongarch/include/asm/unwind.h       |  19 +-
 arch/loongarch/include/asm/unwind_hints.h |  28 ++
 arch/loongarch/kernel/Makefile            |   4 +
 arch/loongarch/kernel/entry.S             |   6 +-
 arch/loongarch/kernel/fpu.S               |   7 +
 arch/loongarch/kernel/genex.S             |   6 +-
 arch/loongarch/kernel/lbt.S               |   5 +
 arch/loongarch/kernel/module.c            |  22 +-
 arch/loongarch/kernel/relocate_kernel.S   |   2 +
 arch/loongarch/kernel/setup.c             |   2 +
 arch/loongarch/kernel/stacktrace.c        |   1 +
 arch/loongarch/kernel/traps.c             |  42 +-
 arch/loongarch/kernel/unwind_orc.c        | 516 ++++++++++++++++++++++
 arch/loongarch/kernel/vmlinux.lds.S       |   3 +
 arch/loongarch/kvm/switch.S               |   7 +-
 arch/loongarch/lib/Makefile               |   2 +
 arch/loongarch/mm/tlb.c                   |  27 +-
 arch/loongarch/mm/tlbex.S                 |   9 +
 arch/loongarch/vdso/Makefile              |   1 +
 include/linux/compiler.h                  |   9 +
 scripts/Makefile                          |   7 +-
 32 files changed, 839 insertions(+), 40 deletions(-)
 create mode 100644 arch/loongarch/include/asm/orc_header.h
 create mode 100644 arch/loongarch/include/asm/orc_lookup.h
 create mode 100644 arch/loongarch/include/asm/orc_types.h
 create mode 100644 arch/loongarch/include/asm/unwind_hints.h
 create mode 100644 arch/loongarch/kernel/unwind_orc.c

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ee123820a476..eea57378646d 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -132,6 +132,7 @@ config LOONGARCH
 	select HAVE_KVM
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
+	select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
@@ -143,6 +144,7 @@ config LOONGARCH
 	select HAVE_SAMPLE_FTRACE_DIRECT
 	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
 	select HAVE_SETUP_PER_CPU_AREA if NUMA
+	select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_TIF_NOHZ
diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug
index 8d36aab53008..98d60630c3d4 100644
--- a/arch/loongarch/Kconfig.debug
+++ b/arch/loongarch/Kconfig.debug
@@ -26,4 +26,15 @@ config UNWINDER_PROLOGUE
 	  Some of the addresses it reports may be incorrect (but better than the
 	  Guess unwinder).
 
+config UNWINDER_ORC
+	bool "ORC unwinder"
+	select OBJTOOL
+	help
+	  This option enables the ORC (Oops Rewind Capability) unwinder for
+	  unwinding kernel stack traces.  It uses a custom data format which is
+	  a simplified version of the DWARF Call Frame Information standard.
+
+	  Enabling this option will increase the kernel's runtime memory usage
+	  by roughly 2-4MB, depending on your kernel config.
+
 endchoice
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 204b94b2e6aa..1f0e41f8ab61 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -25,6 +25,18 @@ endif
 32bit-emul		= elf32loongarch
 64bit-emul		= elf64loongarch
 
+ifdef CONFIG_UNWINDER_ORC
+orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h
+orc_hash_sh := $(srctree)/scripts/orc_hash.sh
+targets += $(orc_hash_h)
+quiet_cmd_orc_hash = GEN     $@
+      cmd_orc_hash = mkdir -p $(dir $@); \
+		     $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@
+$(orc_hash_h): $(srctree)/arch/loongarch/include/asm/orc_types.h $(orc_hash_sh) FORCE
+	$(call if_changed,orc_hash)
+archprepare: $(orc_hash_h)
+endif
+
 ifdef CONFIG_DYNAMIC_FTRACE
 KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
 CC_FLAGS_FTRACE := -fpatchable-function-entry=2
@@ -81,6 +93,13 @@ KBUILD_AFLAGS_MODULE		+= -Wa,-mla-global-with-abs
 KBUILD_CFLAGS_MODULE		+= -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
 endif
 
+KBUILD_AFLAGS			+= $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
+KBUILD_CFLAGS			+= $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
+
+ifdef CONFIG_OBJTOOL
+KBUILD_CFLAGS			+= -fno-optimize-sibling-calls -fno-jump-tables -falign-functions=4 -falign-labels=4
+endif
+
 ifeq ($(CONFIG_RELOCATABLE),y)
 KBUILD_CFLAGS_KERNEL		+= -fPIE
 LDFLAGS_vmlinux			+= -static -pie --no-dynamic-linker -z notext
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..a97c0edbb866 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+generated-y += orc_hash.h
+
 generic-y += dma-contiguous.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h
index d4ca3ba25418..08388876ade4 100644
--- a/arch/loongarch/include/asm/bug.h
+++ b/arch/loongarch/include/asm/bug.h
@@ -44,6 +44,7 @@
 do {								\
 	instrumentation_begin();				\
 	__BUG_FLAGS(BUGFLAG_WARNING|(flags));			\
+	annotate_reachable();					\
 	instrumentation_end();					\
 } while (0)
 
diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h
index af74a3fdcad1..c6d20736fd92 100644
--- a/arch/loongarch/include/asm/exception.h
+++ b/arch/loongarch/include/asm/exception.h
@@ -6,6 +6,8 @@
 #include <asm/ptrace.h>
 #include <linux/kprobes.h>
 
+extern void *exception_table[];
+
 void show_registers(struct pt_regs *regs);
 
 asmlinkage void cache_parity_error(void);
diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
index 2ecd82bb64e1..f33f3fd32ecc 100644
--- a/arch/loongarch/include/asm/module.h
+++ b/arch/loongarch/include/asm/module.h
@@ -6,6 +6,7 @@
 #define _ASM_MODULE_H
 
 #include <asm/inst.h>
+#include <asm/orc_types.h>
 #include <asm-generic/module.h>
 
 #define RELA_STACK_DEPTH 16
@@ -21,6 +22,12 @@ struct mod_arch_specific {
 	struct mod_section plt;
 	struct mod_section plt_idx;
 
+#ifdef CONFIG_UNWINDER_ORC
+	unsigned int num_orcs;
+	int *orc_unwind_ip;
+	struct orc_entry *orc_unwind;
+#endif
+
 	/* For CONFIG_DYNAMIC_FTRACE */
 	struct plt_entry *ftrace_trampolines;
 };
diff --git a/arch/loongarch/include/asm/orc_header.h b/arch/loongarch/include/asm/orc_header.h
new file mode 100644
index 000000000000..f9d509c3fd70
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_header.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _ORC_HEADER_H
+#define _ORC_HEADER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/orc_hash.h>
+
+/*
+ * The header is currently a 20-byte hash of the ORC entry definition; see
+ * scripts/orc_hash.sh.
+ */
+#define ORC_HEADER					\
+	__used __section(".orc_header") __aligned(4)	\
+	static const u8 orc_header[] = { ORC_HASH }
+
+#endif /* _ORC_HEADER_H */
diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h
new file mode 100644
index 000000000000..b02e6357def4
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_lookup.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ORC_LOOKUP_H
+#define _ORC_LOOKUP_H
+
+/*
+ * This is a lookup table for speeding up access to the .orc_unwind table.
+ * Given an input address offset, the corresponding lookup table entry
+ * specifies a subset of the .orc_unwind table to search.
+ *
+ * Each block represents the end of the previous range and the start of the
+ * next range.  An extra block is added to give the last range an end.
+ *
+ * The block size should be a power of 2 to avoid a costly 'div' instruction.
+ *
+ * A block size of 256 was chosen because it roughly doubles unwinder
+ * performance while only adding ~5% to the ORC data footprint.
+ */
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
+#ifndef LINKER_SCRIPT
+
+extern unsigned int orc_lookup[];
+extern unsigned int orc_lookup_end[];
+
+#define LOOKUP_START_IP		(unsigned long)_stext
+#define LOOKUP_STOP_IP		(unsigned long)_etext
+
+#endif /* LINKER_SCRIPT */
+
+#endif /* _ORC_LOOKUP_H */
diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h
new file mode 100644
index 000000000000..caf1f71a1057
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_types.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and FP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED		0
+#define ORC_REG_PREV_SP			1
+#define ORC_REG_SP			2
+#define ORC_REG_FP			3
+#define ORC_REG_MAX			4
+
+#define ORC_TYPE_UNDEFINED		0
+#define ORC_TYPE_END_OF_STACK		1
+#define ORC_TYPE_CALL			2
+#define ORC_TYPE_REGS			3
+#define ORC_TYPE_REGS_PARTIAL		4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard.  It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
+ * unwinder how to find the previous SP and FP (and sometimes entry regs) on
+ * the stack for a given code address.  Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+	s16		sp_offset;
+	s16		fp_offset;
+	s16		ra_offset;
+	unsigned int	sp_reg:4;
+	unsigned int	fp_reg:4;
+	unsigned int	ra_reg:4;
+	unsigned int	type:3;
+	unsigned int	signal:1;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 4fb1e6408b98..45b507a7b06f 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -13,6 +13,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/loongarch.h>
 #include <asm/thread_info.h>
+#include <asm/unwind_hints.h>
 
 /* Make the addition of cfi info a little easier. */
 	.macro cfi_rel_offset reg offset=0 docfi=0
@@ -162,6 +163,7 @@
 	li.w	t0, CSR_CRMD_WE
 	csrxchg	t0, t0, LOONGARCH_CSR_CRMD
 #endif
+	UNWIND_HINT_REGS
 	.endm
 
 	.macro	SAVE_ALL docfi=0
@@ -219,6 +221,7 @@
 
 	.macro	RESTORE_SP_AND_RET docfi=0
 	cfi_ld	sp, PT_R3, \docfi
+	UNWIND_HINT_FUNC
 	ertn
 	.endm
 
diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
index b9dce87afd2e..c7f52d406cce 100644
--- a/arch/loongarch/include/asm/unwind.h
+++ b/arch/loongarch/include/asm/unwind.h
@@ -16,6 +16,7 @@
 enum unwinder_type {
 	UNWINDER_GUESS,
 	UNWINDER_PROLOGUE,
+	UNWINDER_ORC,
 };
 
 struct unwind_state {
@@ -24,7 +25,7 @@ struct unwind_state {
 	struct task_struct *task;
 	bool first, error, reset;
 	int graph_idx;
-	unsigned long sp, pc, ra;
+	unsigned long sp, fp, pc, ra;
 };
 
 bool default_next_frame(struct unwind_state *state);
@@ -34,6 +35,14 @@ void unwind_start(struct unwind_state *state,
 bool unwind_next_frame(struct unwind_state *state);
 unsigned long unwind_get_return_address(struct unwind_state *state);
 
+#ifdef CONFIG_UNWINDER_ORC
+void unwind_init(void);
+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size);
+#else
+static inline void unwind_init(void) {}
+static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {}
+#endif
+
 static inline bool unwind_done(struct unwind_state *state)
 {
 	return state->stack_info.type == STACK_TYPE_UNKNOWN;
@@ -61,14 +70,17 @@ static __always_inline void __unwind_start(struct unwind_state *state,
 		state->sp = regs->regs[3];
 		state->pc = regs->csr_era;
 		state->ra = regs->regs[1];
+		state->fp = regs->regs[22];
 	} else if (task && task != current) {
 		state->sp = thread_saved_fp(task);
 		state->pc = thread_saved_ra(task);
 		state->ra = 0;
+		state->fp = 0;
 	} else {
 		state->sp = (unsigned long)__builtin_frame_address(0);
 		state->pc = (unsigned long)__builtin_return_address(0);
 		state->ra = 0;
+		state->fp = 0;
 	}
 	state->task = task;
 	get_stack_info(state->sp, state->task, &state->stack_info);
@@ -77,6 +89,9 @@ static __always_inline void __unwind_start(struct unwind_state *state,
 
 static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state)
 {
-	return unwind_done(state) ? 0 : state->pc;
+	if (unwind_done(state))
+		return 0;
+
+	return __kernel_text_address(state->pc) ? state->pc : 0;
 }
 #endif /* _ASM_UNWIND_H */
diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h
new file mode 100644
index 000000000000..82443fed3167
--- /dev/null
+++ b/arch/loongarch/include/asm/unwind_hints.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_UNWIND_HINTS_H
+#define _ASM_LOONGARCH_UNWIND_HINTS_H
+
+#include <linux/objtool.h>
+#include <asm/orc_types.h>
+
+#ifdef __ASSEMBLY__
+
+.macro UNWIND_HINT_UNDEFINED
+	UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
+.endm
+
+.macro UNWIND_HINT_EMPTY
+	UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL
+.endm
+
+.macro UNWIND_HINT_REGS
+	UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_REGS
+.endm
+
+.macro UNWIND_HINT_FUNC
+	UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 4fcc168f0732..ac47e11c227e 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -3,6 +3,8 @@
 # Makefile for the Linux/LoongArch kernel.
 #
 
+OBJECT_FILES_NON_STANDARD_head.o := y
+
 extra-y		:= vmlinux.lds
 
 obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
@@ -21,6 +23,7 @@ obj-$(CONFIG_ARCH_STRICT_ALIGN)	+= unaligned.o
 
 CFLAGS_module.o		+= $(call cc-option,-Wno-override-init,)
 CFLAGS_syscall.o	+= $(call cc-option,-Wno-override-init,)
+CFLAGS_traps.o		+= $(call cc-option,-Wno-override-init,)
 CFLAGS_perf_event.o	+= $(call cc-option,-Wno-override-init,)
 
 ifdef CONFIG_FUNCTION_TRACER
@@ -62,6 +65,7 @@ obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
 obj-$(CONFIG_UNWINDER_GUESS)	+= unwind_guess.o
 obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
+obj-$(CONFIG_UNWINDER_ORC)	+= unwind_orc.o
 
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_regs.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index 1ec8e4c4cc2b..2f5c74d26d5f 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -14,11 +14,12 @@
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 #include <asm/thread_info.h>
+#include <asm/unwind_hints.h>
 
 	.text
 	.cfi_sections	.debug_frame
-	.align	5
 SYM_CODE_START(handle_syscall)
+	UNWIND_HINT_UNDEFINED
 	csrrd		t0, PERCPU_BASE_KS
 	la.pcrel	t1, kernelsp
 	add.d		t1, t1, t0
@@ -57,6 +58,7 @@ SYM_CODE_START(handle_syscall)
 	cfi_st		fp, PT_R22
 
 	SAVE_STATIC
+	UNWIND_HINT_REGS
 
 #ifdef CONFIG_KGDB
 	li.w		t1, CSR_CRMD_WE
@@ -75,6 +77,7 @@ SYM_CODE_END(handle_syscall)
 _ASM_NOKPROBE(handle_syscall)
 
 SYM_CODE_START(ret_from_fork)
+	UNWIND_HINT_REGS
 	bl		schedule_tail		# a0 = struct task_struct *prev
 	move		a0, sp
 	bl 		syscall_exit_to_user_mode
@@ -84,6 +87,7 @@ SYM_CODE_START(ret_from_fork)
 SYM_CODE_END(ret_from_fork)
 
 SYM_CODE_START(ret_from_kernel_thread)
+	UNWIND_HINT_REGS
 	bl		schedule_tail		# a0 = struct task_struct *prev
 	move		a0, s1
 	jirl		ra, s0, 0
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index d53ab10f4644..487933899c5d 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -15,6 +15,7 @@
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 #include <asm/regdef.h>
+#include <asm/unwind_hints.h>
 
 #define FPU_REG_WIDTH		8
 #define LSX_REG_WIDTH		16
@@ -524,3 +525,9 @@ SYM_FUNC_END(_restore_lasx_context)
 .L_fpu_fault:
 	li.w	a0, -EFAULT				# failure
 	jr	ra
+
+#ifdef CONFIG_CPU_HAS_LBT
+STACK_FRAME_NON_STANDARD _restore_fp
+STACK_FRAME_NON_STANDARD _restore_lsx
+STACK_FRAME_NON_STANDARD _restore_lasx
+#endif
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 2bb3aa2dcfcb..6b7ec9f1134d 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -16,7 +16,6 @@
 #include <asm/stackframe.h>
 #include <asm/thread_info.h>
 
-	.align	5
 SYM_FUNC_START(__arch_cpu_idle)
 	/* start of rollback region */
 	LONG_L	t0, tp, TI_FLAGS
@@ -32,6 +31,7 @@ SYM_FUNC_START(__arch_cpu_idle)
 SYM_FUNC_END(__arch_cpu_idle)
 
 SYM_CODE_START(handle_vint)
+	UNWIND_HINT_UNDEFINED
 	BACKUP_T0T1
 	SAVE_ALL
 	la_abs	t1, __arch_cpu_idle
@@ -49,6 +49,7 @@ SYM_CODE_START(handle_vint)
 SYM_CODE_END(handle_vint)
 
 SYM_CODE_START(except_vec_cex)
+	UNWIND_HINT_UNDEFINED
 	b	cache_parity_error
 SYM_CODE_END(except_vec_cex)
 
@@ -65,8 +66,8 @@ SYM_CODE_END(except_vec_cex)
 	.endm
 
 	.macro	BUILD_HANDLER exception handler prep
-	.align	5
 	SYM_CODE_START(handle_\exception)
+	UNWIND_HINT_UNDEFINED
 	666:
 	BACKUP_T0T1
 	SAVE_ALL
@@ -94,6 +95,7 @@ SYM_CODE_END(except_vec_cex)
 	BUILD_HANDLER reserved reserved none	/* others */
 
 SYM_CODE_START(handle_sys)
+	UNWIND_HINT_UNDEFINED
 	la_abs	t0, handle_syscall
 	jr	t0
 SYM_CODE_END(handle_sys)
diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
index 9c75120a26d8..4d6914f84c7e 100644
--- a/arch/loongarch/kernel/lbt.S
+++ b/arch/loongarch/kernel/lbt.S
@@ -11,6 +11,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
 #include <asm/regdef.h>
+#include <asm/unwind_hints.h>
 
 #define SCR_REG_WIDTH 8
 
@@ -153,3 +154,7 @@ SYM_FUNC_END(_restore_ftop_context)
 .L_lbt_fault:
 	li.w		a0, -EFAULT		# failure
 	jr		ra
+
+#ifdef CONFIG_CPU_HAS_LBT
+STACK_FRAME_NON_STANDARD _restore_ftop_context
+#endif
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index b13b2858fe39..c7d0338d12c1 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <asm/alternative.h>
 #include <asm/inst.h>
+#include <asm/unwind.h>
 
 static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
 {
@@ -515,15 +516,28 @@ static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs, struct module *mod)
 {
-	const Elf_Shdr *s, *se;
 	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	const Elf_Shdr *s, *alt = NULL, *orc = NULL, *orc_ip = NULL, *ftrace = NULL;
 
-	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
 		if (!strcmp(".altinstructions", secstrs + s->sh_name))
-			apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size);
+			alt = s;
+		if (!strcmp(".orc_unwind", secstrs + s->sh_name))
+			orc = s;
+		if (!strcmp(".orc_unwind_ip", secstrs + s->sh_name))
+			orc_ip = s;
 		if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name))
-			module_init_ftrace_plt(hdr, s, mod);
+			ftrace = s;
 	}
 
+	if (alt)
+		apply_alternatives((void *)alt->sh_addr, (void *)alt->sh_addr + alt->sh_size);
+
+	if (orc && orc_ip)
+		unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size);
+
+	if (ftrace)
+		module_init_ftrace_plt(hdr, ftrace, mod);
+
 	return 0;
 }
diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
index f49f6b053763..bcc191d278c1 100644
--- a/arch/loongarch/kernel/relocate_kernel.S
+++ b/arch/loongarch/kernel/relocate_kernel.S
@@ -15,6 +15,7 @@
 #include <asm/addrspace.h>
 
 SYM_CODE_START(relocate_new_kernel)
+	UNWIND_HINT_UNDEFINED
 	/*
 	 * a0: EFI boot flag for the new kernel
 	 * a1: Command line pointer for the new kernel
@@ -90,6 +91,7 @@ SYM_CODE_END(relocate_new_kernel)
  * then start at the entry point from LOONGARCH_IOCSR_MBUF0.
  */
 SYM_CODE_START(kexec_smp_wait)
+	UNWIND_HINT_UNDEFINED
 1:	li.w		t0, 0x100			/* wait for init loop */
 2:	addi.w		t0, t0, -1			/* limit mailbox access */
 	bnez		t0, 2b
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index d183a745fb85..ec4459c61db6 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -47,6 +47,7 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/time.h>
+#include <asm/unwind.h>
 
 #define SMBIOS_BIOSSIZE_OFFSET		0x09
 #define SMBIOS_BIOSEXTERN_OFFSET	0x13
@@ -605,6 +606,7 @@ static void __init prefill_possible_map(void)
 void __init setup_arch(char **cmdline_p)
 {
 	cpu_probe();
+	unwind_init();
 
 	init_environ();
 	efi_init();
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index 92270f14db94..9848d427cbfa 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -29,6 +29,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 			regs->csr_era = thread_saved_ra(task);
 		}
 		regs->regs[1] = 0;
+		regs->regs[22] = 0;
 	}
 
 	for (unwind_start(&state, task, regs);
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index aebfc3733a76..f9f4eb00c92e 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -53,6 +53,32 @@
 
 #include "access-helper.h"
 
+void *exception_table[EXCCODE_INT_START] = {
+	[0 ... EXCCODE_INT_START - 1] = handle_reserved,
+
+	[EXCCODE_TLBI]		= handle_tlb_load,
+	[EXCCODE_TLBL]		= handle_tlb_load,
+	[EXCCODE_TLBS]		= handle_tlb_store,
+	[EXCCODE_TLBM]		= handle_tlb_modify,
+	[EXCCODE_TLBNR]		= handle_tlb_protect,
+	[EXCCODE_TLBNX]		= handle_tlb_protect,
+	[EXCCODE_TLBPE]		= handle_tlb_protect,
+	[EXCCODE_ADE]		= handle_ade,
+	[EXCCODE_ALE]		= handle_ale,
+	[EXCCODE_BCE]		= handle_bce,
+	[EXCCODE_SYS]		= handle_sys,
+	[EXCCODE_BP]		= handle_bp,
+	[EXCCODE_INE]		= handle_ri,
+	[EXCCODE_IPE]		= handle_ri,
+	[EXCCODE_FPDIS]		= handle_fpu,
+	[EXCCODE_LSXDIS]	= handle_lsx,
+	[EXCCODE_LASXDIS]	= handle_lasx,
+	[EXCCODE_FPE]		= handle_fpe,
+	[EXCCODE_WATCH]		= handle_watch,
+	[EXCCODE_BTDIS]		= handle_lbt,
+};
+EXPORT_SYMBOL_GPL(exception_table);
+
 static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
 			   const char *loglvl, bool user)
 {
@@ -1150,19 +1176,9 @@ void __init trap_init(void)
 	for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++)
 		set_handler(i * VECSIZE, handle_vint, VECSIZE);
 
-	set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE);
-	set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE);
-	set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE);
-	set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE);
-	set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE);
-	set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE);
-	set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE);
-	set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE);
-	set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE);
-	set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE);
-	set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE);
-	set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE);
-	set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE);
+	/* Set exception vector handler */
+	for (i = EXCCODE_ADE; i <= EXCCODE_BTDIS; i++)
+		set_handler(i * VECSIZE, exception_table[i], VECSIZE);
 
 	cache_error_setup();
 
diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c
new file mode 100644
index 000000000000..15f18d1e0dad
--- /dev/null
+++ b/arch/loongarch/kernel/unwind_orc.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/objtool.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/exception.h>
+#include <asm/orc_header.h>
+#include <asm/orc_lookup.h>
+#include <asm/orc_types.h>
+#include <asm/ptrace.h>
+#include <asm/setup.h>
+#include <asm/stacktrace.h>
+#include <asm/tlb.h>
+#include <asm/unwind.h>
+
+ORC_HEADER;
+
+#define orc_warn(fmt, ...) \
+	printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+extern int __start_orc_unwind_ip[];
+extern int __stop_orc_unwind_ip[];
+extern struct orc_entry __start_orc_unwind[];
+extern struct orc_entry __stop_orc_unwind[];
+
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
+
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+	.sp_reg		= ORC_REG_FP,
+	.sp_offset	= 16,
+	.fp_reg		= ORC_REG_PREV_SP,
+	.fp_offset	= -16,
+	.ra_reg		= ORC_REG_PREV_SP,
+	.ra_offset	= -8,
+	.type		= ORC_TYPE_CALL
+};
+
+/*
+ * If we crash with IP==0, the last successfully executed instruction
+ * was probably an indirect function call with a NULL function pointer,
+ * and we don't have unwind information for NULL.
+ * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function
+ * pointer into its parent and then continue normally from there.
+ */
+static struct orc_entry orc_null_entry = {
+	.sp_reg		= ORC_REG_SP,
+	.sp_offset	= sizeof(long),
+	.fp_reg		= ORC_REG_UNDEFINED,
+	.type		= ORC_TYPE_CALL
+};
+
+static inline unsigned long orc_ip(const int *ip)
+{
+	return (unsigned long)ip + *ip;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+				    unsigned int num_entries, unsigned long ip)
+{
+	int *first = ip_table;
+	int *mid = first, *found = first;
+	int *last = ip_table + num_entries - 1;
+
+	if (!num_entries)
+		return NULL;
+
+	/*
+	 * Do a binary range search to find the rightmost duplicate of a given
+	 * starting address.  Some entries are section terminators which are
+	 * "weak" entries for ensuring there are no gaps.  They should be
+	 * ignored when they conflict with a real entry.
+	 */
+	while (first <= last) {
+		mid = first + ((last - first) / 2);
+
+		if (orc_ip(mid) <= ip) {
+			found = mid;
+			first = mid + 1;
+		} else
+			last = mid - 1;
+	}
+
+	return u_table + (found - ip_table);
+}
+
+#ifdef CONFIG_MODULES
+static struct orc_entry *orc_module_find(unsigned long ip)
+{
+	struct module *mod;
+
+	mod = __module_address(ip);
+	if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
+		return NULL;
+
+	return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, mod->arch.num_orcs, ip);
+}
+#else
+static struct orc_entry *orc_module_find(unsigned long ip)
+{
+	return NULL;
+}
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static struct orc_entry *orc_find(unsigned long ip);
+
+/*
+ * Ftrace dynamic trampolines do not have orc entries of their own.
+ * But they are copies of the ftrace entries that are static and
+ * defined in ftrace_*.S, which do have orc entries.
+ *
+ * If the unwinder comes across a ftrace trampoline, then find the
+ * ftrace function that was used to create it, and use that ftrace
+ * function's orc entry, as the placement of the return code in
+ * the stack will be identical.
+ */
+static struct orc_entry *orc_ftrace_find(unsigned long ip)
+{
+	struct ftrace_ops *ops;
+	unsigned long tramp_addr, offset;
+
+	ops = ftrace_ops_trampoline(ip);
+	if (!ops)
+		return NULL;
+
+	/* Set tramp_addr to the start of the code copied by the trampoline */
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+		tramp_addr = (unsigned long)ftrace_regs_caller;
+	else
+		tramp_addr = (unsigned long)ftrace_caller;
+
+	/* Now place tramp_addr to the location within the trampoline ip is at */
+	offset = ip - ops->trampoline;
+	tramp_addr += offset;
+
+	/* Prevent unlikely recursion */
+	if (ip == tramp_addr)
+		return NULL;
+
+	return orc_find(tramp_addr);
+}
+#else
+static struct orc_entry *orc_ftrace_find(unsigned long ip)
+{
+	return NULL;
+}
+#endif
+
+static struct orc_entry *orc_find(unsigned long ip)
+{
+	static struct orc_entry *orc;
+
+	if (ip == 0)
+		return &orc_null_entry;
+
+	/* For non-init vmlinux addresses, use the fast lookup table: */
+	if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
+		unsigned int idx, start, stop;
+
+		idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
+
+		if (unlikely((idx >= lookup_num_blocks-1))) {
+			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
+				 idx, lookup_num_blocks, (void *)ip);
+			return NULL;
+		}
+
+		start = orc_lookup[idx];
+		stop = orc_lookup[idx + 1] + 1;
+
+		if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
+			     (__start_orc_unwind + stop > __stop_orc_unwind))) {
+			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
+				 idx, lookup_num_blocks, start, stop, (void *)ip);
+			return NULL;
+		}
+
+		return __orc_find(__start_orc_unwind_ip + start,
+				  __start_orc_unwind + start, stop - start, ip);
+	}
+
+	/* vmlinux .init slow lookup: */
+	if (is_kernel_inittext(ip))
+		return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
+				  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
+
+	/* Module lookup: */
+	orc = orc_module_find(ip);
+	if (orc)
+		return orc;
+
+	return orc_ftrace_find(ip);
+}
+
+#ifdef CONFIG_MODULES
+
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
+static void orc_sort_swap(void *_a, void *_b, int size)
+{
+	int delta = _b - _a;
+	int *a = _a, *b = _b, tmp;
+	struct orc_entry *orc_a, *orc_b;
+
+	/* Swap the .orc_unwind_ip entries: */
+	tmp = *a;
+	*a = *b + delta;
+	*b = tmp - delta;
+
+	/* Swap the corresponding .orc_unwind entries: */
+	orc_a = cur_orc_table + (a - cur_orc_ip_table);
+	orc_b = cur_orc_table + (b - cur_orc_ip_table);
+	swap(*orc_a, *orc_b);
+}
+
+static int orc_sort_cmp(const void *_a, const void *_b)
+{
+	const int *a = _a, *b = _b;
+	unsigned long a_val = orc_ip(a);
+	unsigned long b_val = orc_ip(b);
+	struct orc_entry *orc_a;
+
+	if (a_val > b_val)
+		return 1;
+	if (a_val < b_val)
+		return -1;
+
+	/*
+	 * The "weak" section terminator entries need to always be first
+	 * to ensure the lookup code skips them in favor of real entries.
+	 * These terminator entries exist to handle any gaps created by
+	 * whitelisted .o files which didn't get objtool generation.
+	 */
+	orc_a = cur_orc_table + (a - cur_orc_ip_table);
+
+	return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
+}
+
+void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
+			void *_orc, size_t orc_size)
+{
+	int *orc_ip = _orc_ip;
+	struct orc_entry *orc = _orc;
+	unsigned int num_entries = orc_ip_size / sizeof(int);
+
+	WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
+		     orc_size % sizeof(*orc) != 0 ||
+		     num_entries != orc_size / sizeof(*orc));
+
+	/*
+	 * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
+	 * associate an .orc_unwind_ip table entry with its corresponding
+	 * .orc_unwind entry so they can both be swapped.
+	 */
+	mutex_lock(&sort_mutex);
+	cur_orc_ip_table = orc_ip;
+	cur_orc_table = orc;
+	sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
+	mutex_unlock(&sort_mutex);
+
+	mod->arch.orc_unwind_ip = orc_ip;
+	mod->arch.orc_unwind = orc;
+	mod->arch.num_orcs = num_entries;
+}
+#endif
+
+void __init unwind_init(void)
+{
+	int i;
+	size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
+	size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
+	size_t num_entries = orc_ip_size / sizeof(int);
+	struct orc_entry *orc;
+
+	if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
+	    orc_size % sizeof(struct orc_entry) != 0 ||
+	    num_entries != orc_size / sizeof(struct orc_entry)) {
+		orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
+		return;
+	}
+
+	/*
+	 * Note, the orc_unwind and orc_unwind_ip tables were already
+	 * sorted at build time via the 'sorttable' tool.
+	 * It's ready for binary search straight away, no need to sort it.
+	 */
+
+	/* Initialize the fast lookup table: */
+	lookup_num_blocks = orc_lookup_end - orc_lookup;
+	for (i = 0; i < lookup_num_blocks-1; i++) {
+		orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
+				 num_entries, LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
+		if (!orc) {
+			orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
+			return;
+		}
+
+		orc_lookup[i] = orc - __start_orc_unwind;
+	}
+
+	/* Initialize the ending block: */
+	orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, LOOKUP_STOP_IP);
+	if (!orc) {
+		orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
+		return;
+	}
+	orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
+
+	orc_init = true;
+}
+
+static inline bool on_stack(struct stack_info *info, unsigned long addr, size_t len)
+{
+	unsigned long begin = info->begin;
+	unsigned long end   = info->end;
+
+	return (info->type != STACK_TYPE_UNKNOWN &&
+		addr >= begin && addr < end && addr + len > begin && addr + len <= end);
+}
+
+static bool stack_access_ok(struct unwind_state *state, unsigned long addr, size_t len)
+{
+	struct stack_info *info = &state->stack_info;
+
+	if (on_stack(info, addr, len))
+		return true;
+
+	return !get_stack_info(addr, state->task, info) && on_stack(info, addr, len);
+}
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	return __unwind_get_return_address(state);
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+void unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs)
+{
+	__unwind_start(state, task, regs);
+	state->type = UNWINDER_ORC;
+	if (!unwind_done(state) && !__kernel_text_address(state->pc))
+		unwind_next_frame(state);
+}
+EXPORT_SYMBOL_GPL(unwind_start);
+
+static bool is_entry_func(unsigned long addr)
+{
+	extern u32 kernel_entry;
+	extern u32 kernel_entry_end;
+
+	return addr >= (unsigned long)&kernel_entry && addr < (unsigned long)&kernel_entry_end;
+}
+
+static inline unsigned long bt_address(unsigned long ra)
+{
+	extern unsigned long eentry;
+
+	if (__kernel_text_address(ra))
+		return ra;
+
+	if (__module_text_address(ra))
+		return ra;
+
+	if (ra >= eentry && ra < eentry +  EXCCODE_INT_END * VECSIZE) {
+		unsigned long func;
+		unsigned long type = (ra - eentry) / VECSIZE;
+		unsigned long offset = (ra - eentry) % VECSIZE;
+
+		switch (type) {
+		case 0 ... EXCCODE_INT_START - 1:
+			func = (unsigned long)exception_table[type];
+			break;
+		case EXCCODE_INT_START ... EXCCODE_INT_END:
+			func = (unsigned long)handle_vint;
+			break;
+		default:
+			func = (unsigned long)handle_reserved;
+			break;
+		}
+
+		return func + offset;
+	}
+
+	return ra;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+	unsigned long *p, pc;
+	struct pt_regs *regs;
+	struct orc_entry *orc;
+	struct stack_info *info = &state->stack_info;
+
+	if (unwind_done(state))
+		return false;
+
+	/* Don't let modules unload while we're reading their ORC data. */
+	preempt_disable();
+
+	if (is_entry_func(state->pc))
+		goto end;
+
+	orc = orc_find(state->pc);
+	if (!orc) {
+		orc = &orc_fp_entry;
+		state->error = true;
+	}
+
+	switch (orc->sp_reg) {
+	case ORC_REG_SP:
+		if (info->type == STACK_TYPE_IRQ && state->sp == info->end)
+			orc->type = ORC_TYPE_REGS;
+		else
+			state->sp = state->sp + orc->sp_offset;
+		break;
+	case ORC_REG_FP:
+		state->sp = state->fp;
+		break;
+	default:
+		orc_warn("unknown SP base reg %d at %pB\n", orc->sp_reg, (void *)state->pc);
+		goto err;
+	}
+
+	switch (orc->fp_reg) {
+	case ORC_REG_PREV_SP:
+		p = (unsigned long *)(state->sp + orc->fp_offset);
+		if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long)))
+			goto err;
+
+		state->fp = *p;
+		break;
+	case ORC_REG_UNDEFINED:
+		/* Nothing. */
+		break;
+	default:
+		orc_warn("unknown FP base reg %d at %pB\n", orc->fp_reg, (void *)state->pc);
+		goto err;
+	}
+
+	switch (orc->type) {
+	case ORC_TYPE_CALL:
+		if (orc->ra_reg == ORC_REG_PREV_SP) {
+			p = (unsigned long *)(state->sp + orc->ra_offset);
+			if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long)))
+				goto err;
+
+			pc = unwind_graph_addr(state, *p, state->sp);
+			pc -= LOONGARCH_INSN_SIZE;
+		} else if (orc->ra_reg == ORC_REG_UNDEFINED) {
+			if (!state->ra || state->ra == state->pc)
+				goto err;
+
+			pc = unwind_graph_addr(state, state->ra, state->sp);
+			pc -=  LOONGARCH_INSN_SIZE;
+			state->ra = 0;
+		} else {
+			orc_warn("unknown ra base reg %d at %pB\n", orc->ra_reg, (void *)state->pc);
+			goto err;
+		}
+		break;
+	case ORC_TYPE_REGS:
+		if (info->type == STACK_TYPE_IRQ && state->sp == info->end)
+			regs = (struct pt_regs *)info->next_sp;
+		else
+			regs = (struct pt_regs *)state->sp;
+
+		if (!stack_access_ok(state, (unsigned long)regs, sizeof(*regs)))
+			goto err;
+
+		if ((info->end == (unsigned long)regs + sizeof(*regs)) &&
+		    !regs->regs[3] && !regs->regs[1])
+			goto end;
+
+		if (user_mode(regs))
+			goto end;
+
+		pc = regs->csr_era;
+		if (!__kernel_text_address(pc))
+			goto err;
+
+		state->sp = regs->regs[3];
+		state->ra = regs->regs[1];
+		state->fp = regs->regs[22];
+		get_stack_info(state->sp, state->task, info);
+
+		break;
+	default:
+		orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)state->pc);
+		goto err;
+	}
+
+	state->pc = bt_address(pc);
+	if (!state->pc) {
+		pr_err("cannot find unwind pc at %pK\n", (void *)pc);
+		goto err;
+	}
+
+	if (!__kernel_text_address(state->pc))
+		goto err;
+
+	preempt_enable();
+	return true;
+
+err:
+	state->error = true;
+
+end:
+	preempt_enable();
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index bb2ec86f37a8..eaa7a91162e6 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -2,6 +2,7 @@
 #include <linux/sizes.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
+#include <asm/orc_lookup.h>
 
 #define PAGE_SIZE _PAGE_SIZE
 #define RO_EXCEPTION_TABLE_ALIGN	4
@@ -122,6 +123,8 @@ SECTIONS
 	}
 #endif
 
+	ORC_UNWIND_TABLE
+
 	.sdata : {
 		*(.sdata)
 	}
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index 0ed9040307b7..9d49c3f6fff5 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -8,7 +8,7 @@
 #include <asm/asmmacro.h>
 #include <asm/loongarch.h>
 #include <asm/regdef.h>
-#include <asm/stackframe.h>
+#include <asm/unwind_hints.h>
 
 #define HGPR_OFFSET(x)		(PT_R0 + 8*x)
 #define GGPR_OFFSET(x)		(KVM_ARCH_GGPR + 8*x)
@@ -112,6 +112,7 @@
 	.text
 	.cfi_sections	.debug_frame
 SYM_CODE_START(kvm_exc_entry)
+	UNWIND_HINT_UNDEFINED
 	csrwr	a2,   KVM_TEMP_KS
 	csrrd	a2,   KVM_VCPU_KS
 	addi.d	a2,   a2, KVM_VCPU_ARCH
@@ -248,3 +249,7 @@ SYM_FUNC_END(kvm_restore_fpu)
 	.section ".rodata"
 SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
 SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
+
+#ifdef CONFIG_CPU_HAS_LBT
+STACK_FRAME_NON_STANDARD kvm_restore_fpu
+#endif
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index a77bf160bfc4..e3023d9a508c 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -3,6 +3,8 @@
 # Makefile for LoongArch-specific library files.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 lib-y	+= delay.o memset.o memcpy.o memmove.o \
 	   clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
 
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index 2c0a411f23aa..f01172a8f4e9 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -9,8 +9,9 @@
 #include <linux/hugetlb.h>
 #include <linux/export.h>
 
-#include <asm/cpu.h>
 #include <asm/bootinfo.h>
+#include <asm/cpu.h>
+#include <asm/exception.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
@@ -266,24 +267,20 @@ static void setup_tlb_handler(int cpu)
 	setup_ptwalker();
 	local_flush_tlb_all();
 
+	if (cpu_has_ptw) {
+		exception_table[EXCCODE_TLBI] = handle_tlb_load_ptw;
+		exception_table[EXCCODE_TLBL] = handle_tlb_load_ptw;
+		exception_table[EXCCODE_TLBS] = handle_tlb_store_ptw;
+		exception_table[EXCCODE_TLBM] = handle_tlb_modify_ptw;
+	}
+
 	/* The tlb handlers are generated only once */
 	if (cpu == 0) {
 		memcpy((void *)tlbrentry, handle_tlb_refill, 0x80);
 		local_flush_icache_range(tlbrentry, tlbrentry + 0x80);
-		if (!cpu_has_ptw) {
-			set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
-			set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE);
-			set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE);
-			set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE);
-		} else {
-			set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE);
-			set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE);
-			set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE);
-			set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE);
-		}
-		set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
-		set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
-		set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
+
+		for (int i = EXCCODE_TLBL; i <= EXCCODE_TLBPE; i++)
+			set_handler(i * VECSIZE, exception_table[i], VECSIZE);
 	}
 #ifdef CONFIG_NUMA
 	else {
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index d5d682f3d29f..a44387b838af 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -18,6 +18,7 @@
 
 	.macro tlb_do_page_fault, write
 	SYM_CODE_START(tlb_do_page_fault_\write)
+	UNWIND_HINT_UNDEFINED
 	SAVE_ALL
 	csrrd		a2, LOONGARCH_CSR_BADV
 	move		a0, sp
@@ -32,6 +33,7 @@
 	tlb_do_page_fault 1
 
 SYM_CODE_START(handle_tlb_protect)
+	UNWIND_HINT_UNDEFINED
 	BACKUP_T0T1
 	SAVE_ALL
 	move		a0, sp
@@ -44,6 +46,7 @@ SYM_CODE_START(handle_tlb_protect)
 SYM_CODE_END(handle_tlb_protect)
 
 SYM_CODE_START(handle_tlb_load)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, EXCEPTION_KS0
 	csrwr		t1, EXCEPTION_KS1
 	csrwr		ra, EXCEPTION_KS2
@@ -190,6 +193,7 @@ nopage_tlb_load:
 SYM_CODE_END(handle_tlb_load)
 
 SYM_CODE_START(handle_tlb_load_ptw)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, LOONGARCH_CSR_KS0
 	csrwr		t1, LOONGARCH_CSR_KS1
 	la_abs		t0, tlb_do_page_fault_0
@@ -197,6 +201,7 @@ SYM_CODE_START(handle_tlb_load_ptw)
 SYM_CODE_END(handle_tlb_load_ptw)
 
 SYM_CODE_START(handle_tlb_store)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, EXCEPTION_KS0
 	csrwr		t1, EXCEPTION_KS1
 	csrwr		ra, EXCEPTION_KS2
@@ -346,6 +351,7 @@ nopage_tlb_store:
 SYM_CODE_END(handle_tlb_store)
 
 SYM_CODE_START(handle_tlb_store_ptw)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, LOONGARCH_CSR_KS0
 	csrwr		t1, LOONGARCH_CSR_KS1
 	la_abs		t0, tlb_do_page_fault_1
@@ -353,6 +359,7 @@ SYM_CODE_START(handle_tlb_store_ptw)
 SYM_CODE_END(handle_tlb_store_ptw)
 
 SYM_CODE_START(handle_tlb_modify)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, EXCEPTION_KS0
 	csrwr		t1, EXCEPTION_KS1
 	csrwr		ra, EXCEPTION_KS2
@@ -500,6 +507,7 @@ nopage_tlb_modify:
 SYM_CODE_END(handle_tlb_modify)
 
 SYM_CODE_START(handle_tlb_modify_ptw)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, LOONGARCH_CSR_KS0
 	csrwr		t1, LOONGARCH_CSR_KS1
 	la_abs		t0, tlb_do_page_fault_1
@@ -507,6 +515,7 @@ SYM_CODE_START(handle_tlb_modify_ptw)
 SYM_CODE_END(handle_tlb_modify_ptw)
 
 SYM_CODE_START(handle_tlb_refill)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, LOONGARCH_CSR_TLBRSAVE
 	csrrd		t0, LOONGARCH_CSR_PGD
 	lddir		t0, t0, 3
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index c74c9921304f..7a4ad96522ac 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -3,6 +3,7 @@
 
 KASAN_SANITIZE := n
 KCOV_INSTRUMENT := n
+OBJECT_FILES_NON_STANDARD := y
 
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index bb1339c7057b..39f2d4a05208 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -116,6 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  */
 #define __stringify_label(n) #n
 
+#define __annotate_reachable(c) ({					\
+	asm volatile(__stringify_label(c) ":\n\t"			\
+			".pushsection .discard.reachable\n\t"		\
+			".long " __stringify_label(c) "b - .\n\t"	\
+			".popsection\n\t");				\
+})
+#define annotate_reachable() __annotate_reachable(__COUNTER__)
+
 #define __annotate_unreachable(c) ({					\
 	asm volatile(__stringify_label(c) ":\n\t"			\
 		     ".pushsection .discard.unreachable\n\t"		\
@@ -128,6 +136,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #define __annotate_jump_table __section(".rodata..c_jump_table")
 
 #else /* !CONFIG_OBJTOOL */
+#define annotate_reachable()
 #define annotate_unreachable()
 #define __annotate_jump_table
 #endif /* CONFIG_OBJTOOL */
diff --git a/scripts/Makefile b/scripts/Makefile
index 576cf64be667..e4cca53d2285 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -31,9 +31,12 @@ HOSTLDLIBS_sign-file = $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null |
 
 ifdef CONFIG_UNWINDER_ORC
 ifeq ($(ARCH),x86_64)
-ARCH := x86
+SRCARCH := x86
 endif
-HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include
+ifeq ($(ARCH),loongarch)
+SRCARCH := loongarch
+endif
+HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/$(SRCARCH)/include
 HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED
 endif
 
-- 
2.42.0
Re: [PATCH v5 8/8] LoongArch: Add ORC stack unwinder support
Posted by Xi Ruoyao 2 years ago
On Wed, 2023-11-29 at 21:07 +0800, Tiezhu Yang wrote:
> +KBUILD_AFLAGS			+= $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
> +KBUILD_CFLAGS			+= $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
> +
> +ifdef CONFIG_OBJTOOL
> +KBUILD_CFLAGS			+= -fno-optimize-sibling-calls -fno-jump-tables -falign-functions=4 -falign-labels=4
> +endif

I still hate regressing the optimization with a passion.

And -falign-labels=4 implies -falign-functions=4 so at least we can
remove -falign-functions=4.

And in GCC >= 14 the defaults are -falign-labels=16 and -falign-
functions=32.  These values are determined by benchmarking on LA464 so
I'd suggest using them (maybe unless CONFIG_CC_OPTIMIZE_FOR_SIZE).

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
Re: [PATCH v5 8/8] LoongArch: Add ORC stack unwinder support
Posted by Tiezhu Yang 2 years ago

On 12/03/2023 09:20 PM, Xi Ruoyao wrote:
> On Wed, 2023-11-29 at 21:07 +0800, Tiezhu Yang wrote:
>> +KBUILD_AFLAGS			+= $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
>> +KBUILD_CFLAGS			+= $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
>> +
>> +ifdef CONFIG_OBJTOOL
>> +KBUILD_CFLAGS			+= -fno-optimize-sibling-calls -fno-jump-tables -falign-functions=4 -falign-labels=4
>> +endif
>
> I still hate regressing the optimization with a passion.

Yes, I agree in general.

>
> And -falign-labels=4 implies -falign-functions=4 so at least we can
> remove -falign-functions=4.

Thanks.

>
> And in GCC >= 14 the defaults are -falign-labels=16 and -falign-
> functions=32.  These values are determined by benchmarking on LA464 so
> I'd suggest using them (maybe unless CONFIG_CC_OPTIMIZE_FOR_SIZE).
>

The initial aim to add "-falign-functions=4 -falign-labels=4" is to
avoid generating nop instructions in .o file with -mrelax option
by default, otherwise the orc info can not match the symbol address
in vmlinux.

I want to add -mno-relax option and remove the extra option
"-falign-functions=4 -falign-labels=4", then the offsets in
each .o file and vmlinux are same, and it is not necessary
to do the special handling about local labels, I think
it will make life easier.

Thanks,
Tiezhu
Re: [PATCH v5 8/8] LoongArch: Add ORC stack unwinder support
Posted by Huacai Chen 2 years ago
On Wed, Nov 29, 2023 at 9:07 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
> The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
> similar in concept to a DWARF unwinder. The difference is that the format
> of the ORC data is much simpler than DWARF, which in turn allows the ORC
> unwinder to be much simpler and faster.
>
> The ORC data consists of unwind tables which are generated by objtool.
> After analyzing all the code paths of a .o file, it determines information
> about the stack state at each instruction address in the file and outputs
> that information to the .orc_unwind and .orc_unwind_ip sections.
>
> The per-object ORC sections are combined at link time and are sorted and
> post-processed at boot time. The unwinder uses the resulting data to
> correlate instruction addresses with their stack states at run time.
>
> Most of the logic are similar with x86, in order to get ra info before ra
> is saved into stack, add ra_reg and ra_offset into orc_entry. At the same
> time, modify some arch-specific code to silence the objtool warnings.
>
> Co-developed-by: Jinyang He <hejinyang@loongson.cn>
> Signed-off-by: Jinyang He <hejinyang@loongson.cn>
> Co-developed-by: Youling Tang <tangyouling@loongson.cn>
> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
> ---
>  arch/loongarch/Kconfig                    |   2 +
>  arch/loongarch/Kconfig.debug              |  11 +
>  arch/loongarch/Makefile                   |  19 +
>  arch/loongarch/include/asm/Kbuild         |   2 +
>  arch/loongarch/include/asm/bug.h          |   1 +
>  arch/loongarch/include/asm/exception.h    |   2 +
>  arch/loongarch/include/asm/module.h       |   7 +
>  arch/loongarch/include/asm/orc_header.h   |  18 +
>  arch/loongarch/include/asm/orc_lookup.h   |  31 ++
>  arch/loongarch/include/asm/orc_types.h    |  58 +++
>  arch/loongarch/include/asm/stackframe.h   |   3 +
>  arch/loongarch/include/asm/unwind.h       |  19 +-
>  arch/loongarch/include/asm/unwind_hints.h |  28 ++
>  arch/loongarch/kernel/Makefile            |   4 +
>  arch/loongarch/kernel/entry.S             |   6 +-
>  arch/loongarch/kernel/fpu.S               |   7 +
>  arch/loongarch/kernel/genex.S             |   6 +-
>  arch/loongarch/kernel/lbt.S               |   5 +
>  arch/loongarch/kernel/module.c            |  22 +-
>  arch/loongarch/kernel/relocate_kernel.S   |   2 +
>  arch/loongarch/kernel/setup.c             |   2 +
>  arch/loongarch/kernel/stacktrace.c        |   1 +
>  arch/loongarch/kernel/traps.c             |  42 +-
>  arch/loongarch/kernel/unwind_orc.c        | 516 ++++++++++++++++++++++
>  arch/loongarch/kernel/vmlinux.lds.S       |   3 +
>  arch/loongarch/kvm/switch.S               |   7 +-
>  arch/loongarch/lib/Makefile               |   2 +
>  arch/loongarch/mm/tlb.c                   |  27 +-
>  arch/loongarch/mm/tlbex.S                 |   9 +
>  arch/loongarch/vdso/Makefile              |   1 +
>  include/linux/compiler.h                  |   9 +
>  scripts/Makefile                          |   7 +-
>  32 files changed, 839 insertions(+), 40 deletions(-)
>  create mode 100644 arch/loongarch/include/asm/orc_header.h
>  create mode 100644 arch/loongarch/include/asm/orc_lookup.h
>  create mode 100644 arch/loongarch/include/asm/orc_types.h
>  create mode 100644 arch/loongarch/include/asm/unwind_hints.h
>  create mode 100644 arch/loongarch/kernel/unwind_orc.c
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index ee123820a476..eea57378646d 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -132,6 +132,7 @@ config LOONGARCH
>         select HAVE_KVM
>         select HAVE_MOD_ARCH_SPECIFIC
>         select HAVE_NMI
> +       select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS
>         select HAVE_PCI
>         select HAVE_PERF_EVENTS
>         select HAVE_PERF_REGS
> @@ -143,6 +144,7 @@ config LOONGARCH
>         select HAVE_SAMPLE_FTRACE_DIRECT
>         select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
>         select HAVE_SETUP_PER_CPU_AREA if NUMA
> +       select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
>         select HAVE_STACKPROTECTOR
>         select HAVE_SYSCALL_TRACEPOINTS
>         select HAVE_TIF_NOHZ
> diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug
> index 8d36aab53008..98d60630c3d4 100644
> --- a/arch/loongarch/Kconfig.debug
> +++ b/arch/loongarch/Kconfig.debug
> @@ -26,4 +26,15 @@ config UNWINDER_PROLOGUE
>           Some of the addresses it reports may be incorrect (but better than the
>           Guess unwinder).
>
> +config UNWINDER_ORC
> +       bool "ORC unwinder"
> +       select OBJTOOL
> +       help
> +         This option enables the ORC (Oops Rewind Capability) unwinder for
> +         unwinding kernel stack traces.  It uses a custom data format which is
> +         a simplified version of the DWARF Call Frame Information standard.
> +
> +         Enabling this option will increase the kernel's runtime memory usage
> +         by roughly 2-4MB, depending on your kernel config.
> +
>  endchoice
> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> index 204b94b2e6aa..1f0e41f8ab61 100644
> --- a/arch/loongarch/Makefile
> +++ b/arch/loongarch/Makefile
> @@ -25,6 +25,18 @@ endif
>  32bit-emul             = elf32loongarch
>  64bit-emul             = elf64loongarch
>
> +ifdef CONFIG_UNWINDER_ORC
> +orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h
> +orc_hash_sh := $(srctree)/scripts/orc_hash.sh
> +targets += $(orc_hash_h)
> +quiet_cmd_orc_hash = GEN     $@
> +      cmd_orc_hash = mkdir -p $(dir $@); \
> +                    $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@
> +$(orc_hash_h): $(srctree)/arch/loongarch/include/asm/orc_types.h $(orc_hash_sh) FORCE
> +       $(call if_changed,orc_hash)
> +archprepare: $(orc_hash_h)
> +endif
> +
>  ifdef CONFIG_DYNAMIC_FTRACE
>  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
>  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> @@ -81,6 +93,13 @@ KBUILD_AFLAGS_MODULE         += -Wa,-mla-global-with-abs
>  KBUILD_CFLAGS_MODULE           += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>  endif
>
> +KBUILD_AFLAGS                  += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
> +KBUILD_CFLAGS                  += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
> +
> +ifdef CONFIG_OBJTOOL
> +KBUILD_CFLAGS                  += -fno-optimize-sibling-calls -fno-jump-tables -falign-functions=4 -falign-labels=4
> +endif
> +
>  ifeq ($(CONFIG_RELOCATABLE),y)
>  KBUILD_CFLAGS_KERNEL           += -fPIE
>  LDFLAGS_vmlinux                        += -static -pie --no-dynamic-linker -z notext
> diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
> index 93783fa24f6e..a97c0edbb866 100644
> --- a/arch/loongarch/include/asm/Kbuild
> +++ b/arch/loongarch/include/asm/Kbuild
> @@ -1,4 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
> +generated-y += orc_hash.h
> +
>  generic-y += dma-contiguous.h
>  generic-y += mcs_spinlock.h
>  generic-y += parport.h
> diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h
> index d4ca3ba25418..08388876ade4 100644
> --- a/arch/loongarch/include/asm/bug.h
> +++ b/arch/loongarch/include/asm/bug.h
> @@ -44,6 +44,7 @@
>  do {                                                           \
>         instrumentation_begin();                                \
>         __BUG_FLAGS(BUGFLAG_WARNING|(flags));                   \
> +       annotate_reachable();                                   \
>         instrumentation_end();                                  \
>  } while (0)
>
> diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h
> index af74a3fdcad1..c6d20736fd92 100644
> --- a/arch/loongarch/include/asm/exception.h
> +++ b/arch/loongarch/include/asm/exception.h
> @@ -6,6 +6,8 @@
>  #include <asm/ptrace.h>
>  #include <linux/kprobes.h>
>
> +extern void *exception_table[];
> +
>  void show_registers(struct pt_regs *regs);
>
>  asmlinkage void cache_parity_error(void);
> diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
> index 2ecd82bb64e1..f33f3fd32ecc 100644
> --- a/arch/loongarch/include/asm/module.h
> +++ b/arch/loongarch/include/asm/module.h
> @@ -6,6 +6,7 @@
>  #define _ASM_MODULE_H
>
>  #include <asm/inst.h>
> +#include <asm/orc_types.h>
>  #include <asm-generic/module.h>
>
>  #define RELA_STACK_DEPTH 16
> @@ -21,6 +22,12 @@ struct mod_arch_specific {
>         struct mod_section plt;
>         struct mod_section plt_idx;
>
> +#ifdef CONFIG_UNWINDER_ORC
> +       unsigned int num_orcs;
> +       int *orc_unwind_ip;
> +       struct orc_entry *orc_unwind;
> +#endif
> +
>         /* For CONFIG_DYNAMIC_FTRACE */
>         struct plt_entry *ftrace_trampolines;
>  };
> diff --git a/arch/loongarch/include/asm/orc_header.h b/arch/loongarch/include/asm/orc_header.h
> new file mode 100644
> index 000000000000..f9d509c3fd70
> --- /dev/null
> +++ b/arch/loongarch/include/asm/orc_header.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +#ifndef _ORC_HEADER_H
> +#define _ORC_HEADER_H
> +
> +#include <linux/types.h>
> +#include <linux/compiler.h>
> +#include <asm/orc_hash.h>
> +
> +/*
> + * The header is currently a 20-byte hash of the ORC entry definition; see
> + * scripts/orc_hash.sh.
> + */
> +#define ORC_HEADER                                     \
> +       __used __section(".orc_header") __aligned(4)    \
> +       static const u8 orc_header[] = { ORC_HASH }
> +
> +#endif /* _ORC_HEADER_H */
> diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h
> new file mode 100644
> index 000000000000..b02e6357def4
> --- /dev/null
> +++ b/arch/loongarch/include/asm/orc_lookup.h
> @@ -0,0 +1,31 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ORC_LOOKUP_H
> +#define _ORC_LOOKUP_H
> +
> +/*
> + * This is a lookup table for speeding up access to the .orc_unwind table.
> + * Given an input address offset, the corresponding lookup table entry
> + * specifies a subset of the .orc_unwind table to search.
> + *
> + * Each block represents the end of the previous range and the start of the
> + * next range.  An extra block is added to give the last range an end.
> + *
> + * The block size should be a power of 2 to avoid a costly 'div' instruction.
> + *
> + * A block size of 256 was chosen because it roughly doubles unwinder
> + * performance while only adding ~5% to the ORC data footprint.
> + */
> +#define LOOKUP_BLOCK_ORDER     8
> +#define LOOKUP_BLOCK_SIZE      (1 << LOOKUP_BLOCK_ORDER)
> +
> +#ifndef LINKER_SCRIPT
> +
> +extern unsigned int orc_lookup[];
> +extern unsigned int orc_lookup_end[];
> +
> +#define LOOKUP_START_IP                (unsigned long)_stext
> +#define LOOKUP_STOP_IP         (unsigned long)_etext
> +
> +#endif /* LINKER_SCRIPT */
> +
> +#endif /* _ORC_LOOKUP_H */
> diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h
> new file mode 100644
> index 000000000000..caf1f71a1057
> --- /dev/null
> +++ b/arch/loongarch/include/asm/orc_types.h
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ORC_TYPES_H
> +#define _ORC_TYPES_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * The ORC_REG_* registers are base registers which are used to find other
> + * registers on the stack.
> + *
> + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
> + * address of the previous frame: the caller's SP before it called the current
> + * function.
> + *
> + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
> + * the current frame.
> + *
> + * The most commonly used base registers are SP and FP -- which the previous SP
> + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is
> + * usually based on.
> + *
> + * The rest of the base registers are needed for special cases like entry code
> + * and GCC realigned stacks.
> + */
> +#define ORC_REG_UNDEFINED              0
> +#define ORC_REG_PREV_SP                        1
> +#define ORC_REG_SP                     2
> +#define ORC_REG_FP                     3
> +#define ORC_REG_MAX                    4
> +
> +#define ORC_TYPE_UNDEFINED             0
> +#define ORC_TYPE_END_OF_STACK          1
> +#define ORC_TYPE_CALL                  2
> +#define ORC_TYPE_REGS                  3
> +#define ORC_TYPE_REGS_PARTIAL          4
> +
> +#ifndef __ASSEMBLY__
> +/*
> + * This struct is more or less a vastly simplified version of the DWARF Call
> + * Frame Information standard.  It contains only the necessary parts of DWARF
> + * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
> + * unwinder how to find the previous SP and FP (and sometimes entry regs) on
> + * the stack for a given code address.  Each instance of the struct corresponds
> + * to one or more code locations.
> + */
> +struct orc_entry {
> +       s16             sp_offset;
> +       s16             fp_offset;
> +       s16             ra_offset;
> +       unsigned int    sp_reg:4;
> +       unsigned int    fp_reg:4;
> +       unsigned int    ra_reg:4;
> +       unsigned int    type:3;
> +       unsigned int    signal:1;
> +};
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* _ORC_TYPES_H */
> diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
> index 4fb1e6408b98..45b507a7b06f 100644
> --- a/arch/loongarch/include/asm/stackframe.h
> +++ b/arch/loongarch/include/asm/stackframe.h
> @@ -13,6 +13,7 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/loongarch.h>
>  #include <asm/thread_info.h>
> +#include <asm/unwind_hints.h>
>
>  /* Make the addition of cfi info a little easier. */
>         .macro cfi_rel_offset reg offset=0 docfi=0
> @@ -162,6 +163,7 @@
>         li.w    t0, CSR_CRMD_WE
>         csrxchg t0, t0, LOONGARCH_CSR_CRMD
>  #endif
> +       UNWIND_HINT_REGS
>         .endm
>
>         .macro  SAVE_ALL docfi=0
> @@ -219,6 +221,7 @@
>
>         .macro  RESTORE_SP_AND_RET docfi=0
>         cfi_ld  sp, PT_R3, \docfi
> +       UNWIND_HINT_FUNC
>         ertn
>         .endm
>
> diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
> index b9dce87afd2e..c7f52d406cce 100644
> --- a/arch/loongarch/include/asm/unwind.h
> +++ b/arch/loongarch/include/asm/unwind.h
> @@ -16,6 +16,7 @@
>  enum unwinder_type {
>         UNWINDER_GUESS,
>         UNWINDER_PROLOGUE,
> +       UNWINDER_ORC,
>  };
>
>  struct unwind_state {
> @@ -24,7 +25,7 @@ struct unwind_state {
>         struct task_struct *task;
>         bool first, error, reset;
>         int graph_idx;
> -       unsigned long sp, pc, ra;
> +       unsigned long sp, fp, pc, ra;
>  };
>
>  bool default_next_frame(struct unwind_state *state);
> @@ -34,6 +35,14 @@ void unwind_start(struct unwind_state *state,
>  bool unwind_next_frame(struct unwind_state *state);
>  unsigned long unwind_get_return_address(struct unwind_state *state);
>
> +#ifdef CONFIG_UNWINDER_ORC
> +void unwind_init(void);
> +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size);
> +#else
> +static inline void unwind_init(void) {}
> +static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {}
> +#endif
> +
>  static inline bool unwind_done(struct unwind_state *state)
>  {
>         return state->stack_info.type == STACK_TYPE_UNKNOWN;
> @@ -61,14 +70,17 @@ static __always_inline void __unwind_start(struct unwind_state *state,
>                 state->sp = regs->regs[3];
>                 state->pc = regs->csr_era;
>                 state->ra = regs->regs[1];
> +               state->fp = regs->regs[22];
>         } else if (task && task != current) {
>                 state->sp = thread_saved_fp(task);
>                 state->pc = thread_saved_ra(task);
>                 state->ra = 0;
> +               state->fp = 0;
>         } else {
>                 state->sp = (unsigned long)__builtin_frame_address(0);
>                 state->pc = (unsigned long)__builtin_return_address(0);
>                 state->ra = 0;
> +               state->fp = 0;
>         }
>         state->task = task;
>         get_stack_info(state->sp, state->task, &state->stack_info);
> @@ -77,6 +89,9 @@ static __always_inline void __unwind_start(struct unwind_state *state,
>
>  static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state)
>  {
> -       return unwind_done(state) ? 0 : state->pc;
> +       if (unwind_done(state))
> +               return 0;
> +
> +       return __kernel_text_address(state->pc) ? state->pc : 0;
>  }
>  #endif /* _ASM_UNWIND_H */
> diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h
> new file mode 100644
> index 000000000000..82443fed3167
> --- /dev/null
> +++ b/arch/loongarch/include/asm/unwind_hints.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_LOONGARCH_UNWIND_HINTS_H
> +#define _ASM_LOONGARCH_UNWIND_HINTS_H
> +
> +#include <linux/objtool.h>
> +#include <asm/orc_types.h>
> +
> +#ifdef __ASSEMBLY__
> +
> +.macro UNWIND_HINT_UNDEFINED
> +       UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
> +.endm
> +
> +.macro UNWIND_HINT_EMPTY
> +       UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL
> +.endm
> +
> +.macro UNWIND_HINT_REGS
> +       UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_REGS
> +.endm
> +
> +.macro UNWIND_HINT_FUNC
> +       UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL
> +.endm
> +
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 4fcc168f0732..ac47e11c227e 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -3,6 +3,8 @@
>  # Makefile for the Linux/LoongArch kernel.
>  #
>
> +OBJECT_FILES_NON_STANDARD_head.o := y
> +
>  extra-y                := vmlinux.lds
>
>  obj-y          += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
> @@ -21,6 +23,7 @@ obj-$(CONFIG_ARCH_STRICT_ALIGN)       += unaligned.o
>
>  CFLAGS_module.o                += $(call cc-option,-Wno-override-init,)
>  CFLAGS_syscall.o       += $(call cc-option,-Wno-override-init,)
> +CFLAGS_traps.o         += $(call cc-option,-Wno-override-init,)
>  CFLAGS_perf_event.o    += $(call cc-option,-Wno-override-init,)
>
>  ifdef CONFIG_FUNCTION_TRACER
> @@ -62,6 +65,7 @@ obj-$(CONFIG_CRASH_DUMP)      += crash_dump.o
>
>  obj-$(CONFIG_UNWINDER_GUESS)   += unwind_guess.o
>  obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
> +obj-$(CONFIG_UNWINDER_ORC)     += unwind_orc.o
>
>  obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_regs.o
>  obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
> diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
> index 1ec8e4c4cc2b..2f5c74d26d5f 100644
> --- a/arch/loongarch/kernel/entry.S
> +++ b/arch/loongarch/kernel/entry.S
> @@ -14,11 +14,12 @@
>  #include <asm/regdef.h>
>  #include <asm/stackframe.h>
>  #include <asm/thread_info.h>
> +#include <asm/unwind_hints.h>
>
>         .text
>         .cfi_sections   .debug_frame
> -       .align  5
>  SYM_CODE_START(handle_syscall)
> +       UNWIND_HINT_UNDEFINED
>         csrrd           t0, PERCPU_BASE_KS
>         la.pcrel        t1, kernelsp
>         add.d           t1, t1, t0
> @@ -57,6 +58,7 @@ SYM_CODE_START(handle_syscall)
>         cfi_st          fp, PT_R22
>
>         SAVE_STATIC
> +       UNWIND_HINT_REGS
>
>  #ifdef CONFIG_KGDB
>         li.w            t1, CSR_CRMD_WE
> @@ -75,6 +77,7 @@ SYM_CODE_END(handle_syscall)
>  _ASM_NOKPROBE(handle_syscall)
>
>  SYM_CODE_START(ret_from_fork)
> +       UNWIND_HINT_REGS
>         bl              schedule_tail           # a0 = struct task_struct *prev
>         move            a0, sp
>         bl              syscall_exit_to_user_mode
> @@ -84,6 +87,7 @@ SYM_CODE_START(ret_from_fork)
>  SYM_CODE_END(ret_from_fork)
>
>  SYM_CODE_START(ret_from_kernel_thread)
> +       UNWIND_HINT_REGS
>         bl              schedule_tail           # a0 = struct task_struct *prev
>         move            a0, s1
>         jirl            ra, s0, 0
> diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
> index d53ab10f4644..487933899c5d 100644
> --- a/arch/loongarch/kernel/fpu.S
> +++ b/arch/loongarch/kernel/fpu.S
> @@ -15,6 +15,7 @@
>  #include <asm/fpregdef.h>
>  #include <asm/loongarch.h>
>  #include <asm/regdef.h>
> +#include <asm/unwind_hints.h>
>
>  #define FPU_REG_WIDTH          8
>  #define LSX_REG_WIDTH          16
> @@ -524,3 +525,9 @@ SYM_FUNC_END(_restore_lasx_context)
>  .L_fpu_fault:
>         li.w    a0, -EFAULT                             # failure
>         jr      ra
> +
> +#ifdef CONFIG_CPU_HAS_LBT
> +STACK_FRAME_NON_STANDARD _restore_fp
> +STACK_FRAME_NON_STANDARD _restore_lsx
> +STACK_FRAME_NON_STANDARD _restore_lasx
> +#endif
> diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
> index 2bb3aa2dcfcb..6b7ec9f1134d 100644
> --- a/arch/loongarch/kernel/genex.S
> +++ b/arch/loongarch/kernel/genex.S
> @@ -16,7 +16,6 @@
>  #include <asm/stackframe.h>
>  #include <asm/thread_info.h>
>
> -       .align  5
>  SYM_FUNC_START(__arch_cpu_idle)
>         /* start of rollback region */
>         LONG_L  t0, tp, TI_FLAGS
> @@ -32,6 +31,7 @@ SYM_FUNC_START(__arch_cpu_idle)
>  SYM_FUNC_END(__arch_cpu_idle)
>
>  SYM_CODE_START(handle_vint)
> +       UNWIND_HINT_UNDEFINED
>         BACKUP_T0T1
>         SAVE_ALL
>         la_abs  t1, __arch_cpu_idle
> @@ -49,6 +49,7 @@ SYM_CODE_START(handle_vint)
>  SYM_CODE_END(handle_vint)
>
>  SYM_CODE_START(except_vec_cex)
> +       UNWIND_HINT_UNDEFINED
>         b       cache_parity_error
>  SYM_CODE_END(except_vec_cex)
>
> @@ -65,8 +66,8 @@ SYM_CODE_END(except_vec_cex)
>         .endm
>
>         .macro  BUILD_HANDLER exception handler prep
> -       .align  5
>         SYM_CODE_START(handle_\exception)
> +       UNWIND_HINT_UNDEFINED
>         666:
>         BACKUP_T0T1
>         SAVE_ALL
> @@ -94,6 +95,7 @@ SYM_CODE_END(except_vec_cex)
>         BUILD_HANDLER reserved reserved none    /* others */
>
>  SYM_CODE_START(handle_sys)
> +       UNWIND_HINT_UNDEFINED
>         la_abs  t0, handle_syscall
>         jr      t0
>  SYM_CODE_END(handle_sys)
> diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
> index 9c75120a26d8..4d6914f84c7e 100644
> --- a/arch/loongarch/kernel/lbt.S
> +++ b/arch/loongarch/kernel/lbt.S
> @@ -11,6 +11,7 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/errno.h>
>  #include <asm/regdef.h>
> +#include <asm/unwind_hints.h>
>
>  #define SCR_REG_WIDTH 8
>
> @@ -153,3 +154,7 @@ SYM_FUNC_END(_restore_ftop_context)
>  .L_lbt_fault:
>         li.w            a0, -EFAULT             # failure
>         jr              ra
> +
> +#ifdef CONFIG_CPU_HAS_LBT
> +STACK_FRAME_NON_STANDARD _restore_ftop_context
> +#endif
> diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
> index b13b2858fe39..c7d0338d12c1 100644
> --- a/arch/loongarch/kernel/module.c
> +++ b/arch/loongarch/kernel/module.c
> @@ -20,6 +20,7 @@
>  #include <linux/kernel.h>
>  #include <asm/alternative.h>
>  #include <asm/inst.h>
> +#include <asm/unwind.h>
>
>  static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
>  {
> @@ -515,15 +516,28 @@ static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
>  int module_finalize(const Elf_Ehdr *hdr,
>                     const Elf_Shdr *sechdrs, struct module *mod)
>  {
> -       const Elf_Shdr *s, *se;
>         const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
> +       const Elf_Shdr *s, *alt = NULL, *orc = NULL, *orc_ip = NULL, *ftrace = NULL;
>
> -       for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
> +       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
>                 if (!strcmp(".altinstructions", secstrs + s->sh_name))
> -                       apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size);
> +                       alt = s;
> +               if (!strcmp(".orc_unwind", secstrs + s->sh_name))
> +                       orc = s;
> +               if (!strcmp(".orc_unwind_ip", secstrs + s->sh_name))
> +                       orc_ip = s;
>                 if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name))
> -                       module_init_ftrace_plt(hdr, s, mod);
> +                       ftrace = s;
>         }
>
> +       if (alt)
> +               apply_alternatives((void *)alt->sh_addr, (void *)alt->sh_addr + alt->sh_size);
> +
> +       if (orc && orc_ip)
> +               unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size);
> +
> +       if (ftrace)
> +               module_init_ftrace_plt(hdr, ftrace, mod);
> +
>         return 0;
>  }
> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> index f49f6b053763..bcc191d278c1 100644
> --- a/arch/loongarch/kernel/relocate_kernel.S
> +++ b/arch/loongarch/kernel/relocate_kernel.S
> @@ -15,6 +15,7 @@
>  #include <asm/addrspace.h>
>
>  SYM_CODE_START(relocate_new_kernel)
> +       UNWIND_HINT_UNDEFINED
>         /*
>          * a0: EFI boot flag for the new kernel
>          * a1: Command line pointer for the new kernel
> @@ -90,6 +91,7 @@ SYM_CODE_END(relocate_new_kernel)
>   * then start at the entry point from LOONGARCH_IOCSR_MBUF0.
>   */
>  SYM_CODE_START(kexec_smp_wait)
> +       UNWIND_HINT_UNDEFINED
>  1:     li.w            t0, 0x100                       /* wait for init loop */
>  2:     addi.w          t0, t0, -1                      /* limit mailbox access */
>         bnez            t0, 2b
> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> index d183a745fb85..ec4459c61db6 100644
> --- a/arch/loongarch/kernel/setup.c
> +++ b/arch/loongarch/kernel/setup.c
> @@ -47,6 +47,7 @@
>  #include <asm/sections.h>
>  #include <asm/setup.h>
>  #include <asm/time.h>
> +#include <asm/unwind.h>
>
>  #define SMBIOS_BIOSSIZE_OFFSET         0x09
>  #define SMBIOS_BIOSEXTERN_OFFSET       0x13
> @@ -605,6 +606,7 @@ static void __init prefill_possible_map(void)
>  void __init setup_arch(char **cmdline_p)
>  {
>         cpu_probe();
> +       unwind_init();
>
>         init_environ();
>         efi_init();
> diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
> index 92270f14db94..9848d427cbfa 100644
> --- a/arch/loongarch/kernel/stacktrace.c
> +++ b/arch/loongarch/kernel/stacktrace.c
> @@ -29,6 +29,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
>                         regs->csr_era = thread_saved_ra(task);
>                 }
>                 regs->regs[1] = 0;
> +               regs->regs[22] = 0;
>         }
>
>         for (unwind_start(&state, task, regs);
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index aebfc3733a76..f9f4eb00c92e 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -53,6 +53,32 @@
>
>  #include "access-helper.h"
>
> +void *exception_table[EXCCODE_INT_START] = {
> +       [0 ... EXCCODE_INT_START - 1] = handle_reserved,
> +
> +       [EXCCODE_TLBI]          = handle_tlb_load,
> +       [EXCCODE_TLBL]          = handle_tlb_load,
> +       [EXCCODE_TLBS]          = handle_tlb_store,
> +       [EXCCODE_TLBM]          = handle_tlb_modify,
> +       [EXCCODE_TLBNR]         = handle_tlb_protect,
> +       [EXCCODE_TLBNX]         = handle_tlb_protect,
> +       [EXCCODE_TLBPE]         = handle_tlb_protect,
> +       [EXCCODE_ADE]           = handle_ade,
> +       [EXCCODE_ALE]           = handle_ale,
> +       [EXCCODE_BCE]           = handle_bce,
> +       [EXCCODE_SYS]           = handle_sys,
> +       [EXCCODE_BP]            = handle_bp,
> +       [EXCCODE_INE]           = handle_ri,
> +       [EXCCODE_IPE]           = handle_ri,
> +       [EXCCODE_FPDIS]         = handle_fpu,
> +       [EXCCODE_LSXDIS]        = handle_lsx,
> +       [EXCCODE_LASXDIS]       = handle_lasx,
> +       [EXCCODE_FPE]           = handle_fpe,
> +       [EXCCODE_WATCH]         = handle_watch,
> +       [EXCCODE_BTDIS]         = handle_lbt,
> +};
> +EXPORT_SYMBOL_GPL(exception_table);
> +
>  static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
>                            const char *loglvl, bool user)
>  {
> @@ -1150,19 +1176,9 @@ void __init trap_init(void)
>         for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++)
>                 set_handler(i * VECSIZE, handle_vint, VECSIZE);
>
> -       set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE);
> -       set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE);
> -       set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE);
> -       set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE);
> -       set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE);
> -       set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE);
> -       set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE);
> -       set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE);
> -       set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE);
> -       set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE);
> -       set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE);
> -       set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE);
> -       set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE);
> +       /* Set exception vector handler */
> +       for (i = EXCCODE_ADE; i <= EXCCODE_BTDIS; i++)
> +               set_handler(i * VECSIZE, exception_table[i], VECSIZE);
>
>         cache_error_setup();
>
> diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c
> new file mode 100644
> index 000000000000..15f18d1e0dad
> --- /dev/null
> +++ b/arch/loongarch/kernel/unwind_orc.c
> @@ -0,0 +1,516 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <linux/objtool.h>
> +#include <linux/module.h>
> +#include <linux/sort.h>
> +#include <asm/exception.h>
> +#include <asm/orc_header.h>
> +#include <asm/orc_lookup.h>
> +#include <asm/orc_types.h>
> +#include <asm/ptrace.h>
> +#include <asm/setup.h>
> +#include <asm/stacktrace.h>
> +#include <asm/tlb.h>
> +#include <asm/unwind.h>
> +
> +ORC_HEADER;
> +
> +#define orc_warn(fmt, ...) \
> +       printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
> +
> +extern int __start_orc_unwind_ip[];
> +extern int __stop_orc_unwind_ip[];
> +extern struct orc_entry __start_orc_unwind[];
> +extern struct orc_entry __stop_orc_unwind[];
> +
> +static bool orc_init __ro_after_init;
> +static unsigned int lookup_num_blocks __ro_after_init;
> +
> +/* Fake frame pointer entry -- used as a fallback for generated code */
> +static struct orc_entry orc_fp_entry = {
> +       .sp_reg         = ORC_REG_FP,
> +       .sp_offset      = 16,
> +       .fp_reg         = ORC_REG_PREV_SP,
> +       .fp_offset      = -16,
> +       .ra_reg         = ORC_REG_PREV_SP,
> +       .ra_offset      = -8,
> +       .type           = ORC_TYPE_CALL
> +};
> +
> +/*
> + * If we crash with IP==0, the last successfully executed instruction
> + * was probably an indirect function call with a NULL function pointer,
> + * and we don't have unwind information for NULL.
> + * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function
> + * pointer into its parent and then continue normally from there.
> + */
> +static struct orc_entry orc_null_entry = {
> +       .sp_reg         = ORC_REG_SP,
> +       .sp_offset      = sizeof(long),
> +       .fp_reg         = ORC_REG_UNDEFINED,
> +       .type           = ORC_TYPE_CALL
> +};
> +
> +static inline unsigned long orc_ip(const int *ip)
> +{
> +       return (unsigned long)ip + *ip;
> +}
> +
> +static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
> +                                   unsigned int num_entries, unsigned long ip)
> +{
> +       int *first = ip_table;
> +       int *mid = first, *found = first;
> +       int *last = ip_table + num_entries - 1;
> +
> +       if (!num_entries)
> +               return NULL;
> +
> +       /*
> +        * Do a binary range search to find the rightmost duplicate of a given
> +        * starting address.  Some entries are section terminators which are
> +        * "weak" entries for ensuring there are no gaps.  They should be
> +        * ignored when they conflict with a real entry.
> +        */
> +       while (first <= last) {
> +               mid = first + ((last - first) / 2);
> +
> +               if (orc_ip(mid) <= ip) {
> +                       found = mid;
> +                       first = mid + 1;
> +               } else
> +                       last = mid - 1;
> +       }
> +
> +       return u_table + (found - ip_table);
> +}
> +
> +#ifdef CONFIG_MODULES
> +static struct orc_entry *orc_module_find(unsigned long ip)
> +{
> +       struct module *mod;
> +
> +       mod = __module_address(ip);
> +       if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
> +               return NULL;
> +
> +       return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, mod->arch.num_orcs, ip);
> +}
> +#else
> +static struct orc_entry *orc_module_find(unsigned long ip)
> +{
> +       return NULL;
> +}
> +#endif
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +static struct orc_entry *orc_find(unsigned long ip);
> +
> +/*
> + * Ftrace dynamic trampolines do not have orc entries of their own.
> + * But they are copies of the ftrace entries that are static and
> + * defined in ftrace_*.S, which do have orc entries.
> + *
> + * If the unwinder comes across a ftrace trampoline, then find the
> + * ftrace function that was used to create it, and use that ftrace
> + * function's orc entry, as the placement of the return code in
> + * the stack will be identical.
> + */
> +static struct orc_entry *orc_ftrace_find(unsigned long ip)
> +{
> +       struct ftrace_ops *ops;
> +       unsigned long tramp_addr, offset;
> +
> +       ops = ftrace_ops_trampoline(ip);
> +       if (!ops)
> +               return NULL;
> +
> +       /* Set tramp_addr to the start of the code copied by the trampoline */
> +       if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
> +               tramp_addr = (unsigned long)ftrace_regs_caller;
> +       else
> +               tramp_addr = (unsigned long)ftrace_caller;
> +
> +       /* Now place tramp_addr to the location within the trampoline ip is at */
> +       offset = ip - ops->trampoline;
> +       tramp_addr += offset;
> +
> +       /* Prevent unlikely recursion */
> +       if (ip == tramp_addr)
> +               return NULL;
> +
> +       return orc_find(tramp_addr);
> +}
> +#else
> +static struct orc_entry *orc_ftrace_find(unsigned long ip)
> +{
> +       return NULL;
> +}
> +#endif
> +
> +static struct orc_entry *orc_find(unsigned long ip)
> +{
> +       static struct orc_entry *orc;
> +
> +       if (ip == 0)
> +               return &orc_null_entry;
> +
> +       /* For non-init vmlinux addresses, use the fast lookup table: */
> +       if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
> +               unsigned int idx, start, stop;
> +
> +               idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
> +
> +               if (unlikely((idx >= lookup_num_blocks-1))) {
> +                       orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
> +                                idx, lookup_num_blocks, (void *)ip);
> +                       return NULL;
> +               }
> +
> +               start = orc_lookup[idx];
> +               stop = orc_lookup[idx + 1] + 1;
> +
> +               if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
> +                            (__start_orc_unwind + stop > __stop_orc_unwind))) {
> +                       orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
> +                                idx, lookup_num_blocks, start, stop, (void *)ip);
> +                       return NULL;
> +               }
> +
> +               return __orc_find(__start_orc_unwind_ip + start,
> +                                 __start_orc_unwind + start, stop - start, ip);
> +       }
> +
> +       /* vmlinux .init slow lookup: */
> +       if (is_kernel_inittext(ip))
> +               return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
> +                                 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
> +
> +       /* Module lookup: */
> +       orc = orc_module_find(ip);
> +       if (orc)
> +               return orc;
> +
> +       return orc_ftrace_find(ip);
> +}
> +
> +#ifdef CONFIG_MODULES
> +
> +static DEFINE_MUTEX(sort_mutex);
> +static int *cur_orc_ip_table = __start_orc_unwind_ip;
> +static struct orc_entry *cur_orc_table = __start_orc_unwind;
> +
> +static void orc_sort_swap(void *_a, void *_b, int size)
> +{
> +       int delta = _b - _a;
> +       int *a = _a, *b = _b, tmp;
> +       struct orc_entry *orc_a, *orc_b;
> +
> +       /* Swap the .orc_unwind_ip entries: */
> +       tmp = *a;
> +       *a = *b + delta;
> +       *b = tmp - delta;
> +
> +       /* Swap the corresponding .orc_unwind entries: */
> +       orc_a = cur_orc_table + (a - cur_orc_ip_table);
> +       orc_b = cur_orc_table + (b - cur_orc_ip_table);
> +       swap(*orc_a, *orc_b);
> +}
> +
> +static int orc_sort_cmp(const void *_a, const void *_b)
> +{
> +       const int *a = _a, *b = _b;
> +       unsigned long a_val = orc_ip(a);
> +       unsigned long b_val = orc_ip(b);
> +       struct orc_entry *orc_a;
> +
> +       if (a_val > b_val)
> +               return 1;
> +       if (a_val < b_val)
> +               return -1;
> +
> +       /*
> +        * The "weak" section terminator entries need to always be first
> +        * to ensure the lookup code skips them in favor of real entries.
> +        * These terminator entries exist to handle any gaps created by
> +        * whitelisted .o files which didn't get objtool generation.
> +        */
> +       orc_a = cur_orc_table + (a - cur_orc_ip_table);
> +
> +       return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
> +}
> +
> +void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
> +                       void *_orc, size_t orc_size)
> +{
> +       int *orc_ip = _orc_ip;
> +       struct orc_entry *orc = _orc;
> +       unsigned int num_entries = orc_ip_size / sizeof(int);
> +
> +       WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
> +                    orc_size % sizeof(*orc) != 0 ||
> +                    num_entries != orc_size / sizeof(*orc));
> +
> +       /*
> +        * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
> +        * associate an .orc_unwind_ip table entry with its corresponding
> +        * .orc_unwind entry so they can both be swapped.
> +        */
> +       mutex_lock(&sort_mutex);
> +       cur_orc_ip_table = orc_ip;
> +       cur_orc_table = orc;
> +       sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
> +       mutex_unlock(&sort_mutex);
> +
> +       mod->arch.orc_unwind_ip = orc_ip;
> +       mod->arch.orc_unwind = orc;
> +       mod->arch.num_orcs = num_entries;
> +}
> +#endif
> +
> +void __init unwind_init(void)
> +{
> +       int i;
> +       size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
> +       size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
> +       size_t num_entries = orc_ip_size / sizeof(int);
> +       struct orc_entry *orc;
> +
> +       if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
> +           orc_size % sizeof(struct orc_entry) != 0 ||
> +           num_entries != orc_size / sizeof(struct orc_entry)) {
> +               orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
> +               return;
> +       }
> +
> +       /*
> +        * Note, the orc_unwind and orc_unwind_ip tables were already
> +        * sorted at build time via the 'sorttable' tool.
> +        * It's ready for binary search straight away, no need to sort it.
> +        */
> +
> +       /* Initialize the fast lookup table: */
> +       lookup_num_blocks = orc_lookup_end - orc_lookup;
> +       for (i = 0; i < lookup_num_blocks-1; i++) {
> +               orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
> +                                num_entries, LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
> +               if (!orc) {
> +                       orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
> +                       return;
> +               }
> +
> +               orc_lookup[i] = orc - __start_orc_unwind;
> +       }
> +
> +       /* Initialize the ending block: */
> +       orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, LOOKUP_STOP_IP);
> +       if (!orc) {
> +               orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
> +               return;
> +       }
> +       orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
> +
> +       orc_init = true;
> +}
> +
> +static inline bool on_stack(struct stack_info *info, unsigned long addr, size_t len)
> +{
> +       unsigned long begin = info->begin;
> +       unsigned long end   = info->end;
> +
> +       return (info->type != STACK_TYPE_UNKNOWN &&
> +               addr >= begin && addr < end && addr + len > begin && addr + len <= end);
> +}
> +
> +static bool stack_access_ok(struct unwind_state *state, unsigned long addr, size_t len)
> +{
> +       struct stack_info *info = &state->stack_info;
> +
> +       if (on_stack(info, addr, len))
> +               return true;
> +
> +       return !get_stack_info(addr, state->task, info) && on_stack(info, addr, len);
> +}
> +
> +unsigned long unwind_get_return_address(struct unwind_state *state)
> +{
> +       return __unwind_get_return_address(state);
> +}
> +EXPORT_SYMBOL_GPL(unwind_get_return_address);
> +
> +void unwind_start(struct unwind_state *state, struct task_struct *task,
> +                   struct pt_regs *regs)
> +{
> +       __unwind_start(state, task, regs);
> +       state->type = UNWINDER_ORC;
> +       if (!unwind_done(state) && !__kernel_text_address(state->pc))
> +               unwind_next_frame(state);
> +}
> +EXPORT_SYMBOL_GPL(unwind_start);
> +
> +static bool is_entry_func(unsigned long addr)
> +{
> +       extern u32 kernel_entry;
> +       extern u32 kernel_entry_end;
> +
> +       return addr >= (unsigned long)&kernel_entry && addr < (unsigned long)&kernel_entry_end;
> +}
> +
> +static inline unsigned long bt_address(unsigned long ra)
> +{
> +       extern unsigned long eentry;
> +
> +       if (__kernel_text_address(ra))
> +               return ra;
> +
> +       if (__module_text_address(ra))
> +               return ra;
> +
> +       if (ra >= eentry && ra < eentry +  EXCCODE_INT_END * VECSIZE) {
> +               unsigned long func;
> +               unsigned long type = (ra - eentry) / VECSIZE;
> +               unsigned long offset = (ra - eentry) % VECSIZE;
> +
> +               switch (type) {
> +               case 0 ... EXCCODE_INT_START - 1:
> +                       func = (unsigned long)exception_table[type];
> +                       break;
> +               case EXCCODE_INT_START ... EXCCODE_INT_END:
> +                       func = (unsigned long)handle_vint;
> +                       break;
> +               default:
> +                       func = (unsigned long)handle_reserved;
> +                       break;
> +               }
> +
> +               return func + offset;
> +       }
> +
> +       return ra;
> +}
> +
> +bool unwind_next_frame(struct unwind_state *state)
> +{
> +       unsigned long *p, pc;
> +       struct pt_regs *regs;
> +       struct orc_entry *orc;
> +       struct stack_info *info = &state->stack_info;
> +
> +       if (unwind_done(state))
> +               return false;
> +
> +       /* Don't let modules unload while we're reading their ORC data. */
> +       preempt_disable();
> +
> +       if (is_entry_func(state->pc))
> +               goto end;
> +
> +       orc = orc_find(state->pc);
> +       if (!orc) {
> +               orc = &orc_fp_entry;
> +               state->error = true;
> +       }
> +
> +       switch (orc->sp_reg) {
> +       case ORC_REG_SP:
> +               if (info->type == STACK_TYPE_IRQ && state->sp == info->end)
> +                       orc->type = ORC_TYPE_REGS;
> +               else
> +                       state->sp = state->sp + orc->sp_offset;
> +               break;
> +       case ORC_REG_FP:
> +               state->sp = state->fp;
> +               break;
> +       default:
> +               orc_warn("unknown SP base reg %d at %pB\n", orc->sp_reg, (void *)state->pc);
> +               goto err;
> +       }
> +
> +       switch (orc->fp_reg) {
> +       case ORC_REG_PREV_SP:
> +               p = (unsigned long *)(state->sp + orc->fp_offset);
> +               if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long)))
> +                       goto err;
> +
> +               state->fp = *p;
> +               break;
> +       case ORC_REG_UNDEFINED:
> +               /* Nothing. */
> +               break;
> +       default:
> +               orc_warn("unknown FP base reg %d at %pB\n", orc->fp_reg, (void *)state->pc);
> +               goto err;
> +       }
> +
> +       switch (orc->type) {
> +       case ORC_TYPE_CALL:
> +               if (orc->ra_reg == ORC_REG_PREV_SP) {
> +                       p = (unsigned long *)(state->sp + orc->ra_offset);
> +                       if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long)))
> +                               goto err;
> +
> +                       pc = unwind_graph_addr(state, *p, state->sp);
> +                       pc -= LOONGARCH_INSN_SIZE;
> +               } else if (orc->ra_reg == ORC_REG_UNDEFINED) {
> +                       if (!state->ra || state->ra == state->pc)
> +                               goto err;
> +
> +                       pc = unwind_graph_addr(state, state->ra, state->sp);
> +                       pc -=  LOONGARCH_INSN_SIZE;
> +                       state->ra = 0;
> +               } else {
> +                       orc_warn("unknown ra base reg %d at %pB\n", orc->ra_reg, (void *)state->pc);
> +                       goto err;
> +               }
> +               break;
> +       case ORC_TYPE_REGS:
> +               if (info->type == STACK_TYPE_IRQ && state->sp == info->end)
> +                       regs = (struct pt_regs *)info->next_sp;
> +               else
> +                       regs = (struct pt_regs *)state->sp;
> +
> +               if (!stack_access_ok(state, (unsigned long)regs, sizeof(*regs)))
> +                       goto err;
> +
> +               if ((info->end == (unsigned long)regs + sizeof(*regs)) &&
> +                   !regs->regs[3] && !regs->regs[1])
> +                       goto end;
> +
> +               if (user_mode(regs))
> +                       goto end;
> +
> +               pc = regs->csr_era;
> +               if (!__kernel_text_address(pc))
> +                       goto err;
> +
> +               state->sp = regs->regs[3];
> +               state->ra = regs->regs[1];
> +               state->fp = regs->regs[22];
> +               get_stack_info(state->sp, state->task, info);
> +
> +               break;
> +       default:
> +               orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)state->pc);
> +               goto err;
> +       }
> +
> +       state->pc = bt_address(pc);
> +       if (!state->pc) {
> +               pr_err("cannot find unwind pc at %pK\n", (void *)pc);
> +               goto err;
> +       }
> +
> +       if (!__kernel_text_address(state->pc))
> +               goto err;
> +
> +       preempt_enable();
> +       return true;
> +
> +err:
> +       state->error = true;
> +
> +end:
> +       preempt_enable();
> +       state->stack_info.type = STACK_TYPE_UNKNOWN;
> +       return false;
> +}
> +EXPORT_SYMBOL_GPL(unwind_next_frame);
> diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
> index bb2ec86f37a8..eaa7a91162e6 100644
> --- a/arch/loongarch/kernel/vmlinux.lds.S
> +++ b/arch/loongarch/kernel/vmlinux.lds.S
> @@ -2,6 +2,7 @@
>  #include <linux/sizes.h>
>  #include <asm/asm-offsets.h>
>  #include <asm/thread_info.h>
> +#include <asm/orc_lookup.h>
>
>  #define PAGE_SIZE _PAGE_SIZE
>  #define RO_EXCEPTION_TABLE_ALIGN       4
> @@ -122,6 +123,8 @@ SECTIONS
>         }
>  #endif
>
> +       ORC_UNWIND_TABLE
> +
>         .sdata : {
>                 *(.sdata)
>         }
> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> index 0ed9040307b7..9d49c3f6fff5 100644
> --- a/arch/loongarch/kvm/switch.S
> +++ b/arch/loongarch/kvm/switch.S
> @@ -8,7 +8,7 @@
>  #include <asm/asmmacro.h>
>  #include <asm/loongarch.h>
>  #include <asm/regdef.h>
> -#include <asm/stackframe.h>
> +#include <asm/unwind_hints.h>
>
>  #define HGPR_OFFSET(x)         (PT_R0 + 8*x)
>  #define GGPR_OFFSET(x)         (KVM_ARCH_GGPR + 8*x)
> @@ -112,6 +112,7 @@
>         .text
>         .cfi_sections   .debug_frame
>  SYM_CODE_START(kvm_exc_entry)
> +       UNWIND_HINT_UNDEFINED
>         csrwr   a2,   KVM_TEMP_KS
>         csrrd   a2,   KVM_VCPU_KS
>         addi.d  a2,   a2, KVM_VCPU_ARCH
> @@ -248,3 +249,7 @@ SYM_FUNC_END(kvm_restore_fpu)
>         .section ".rodata"
>  SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
>  SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
> +
> +#ifdef CONFIG_CPU_HAS_LBT
> +STACK_FRAME_NON_STANDARD kvm_restore_fpu
> +#endif
> diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
> index a77bf160bfc4..e3023d9a508c 100644
> --- a/arch/loongarch/lib/Makefile
> +++ b/arch/loongarch/lib/Makefile
> @@ -3,6 +3,8 @@
>  # Makefile for LoongArch-specific library files.
>  #
>
> +OBJECT_FILES_NON_STANDARD := y
> +
>  lib-y  += delay.o memset.o memcpy.o memmove.o \
>            clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
>
I have a draft live-patch patch here:
https://github.com/chenhuacai/linux/commit/744942cbf456b320f2333638e4bd27d35900284c

But we get such an error:

# selftests: livepatch: test-ftrace.sh

                                      [48/1865]
[14936.262721] livepatch: failed to register ftrace handler for
function 'cmdline_proc_show' (-16)
# TEST: livepatch interaction with ftrace_enabled sysctl ...
[14936.345709] Kernel ade access[#1]:
[14936.349096] CPU: 1 PID: 2571 Comm: (udev-worker) Tainted: G
     K    6.7.0-rc3+ #1023 33dbce5afaee02d40ea2806811349d9b0065d5fd
[14936.361203] Hardware name: Loongson
Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS
vUDK2018-LoongArch-V2.0.0-prebeta9 10/21/2022
[14936.374515] pc 90000000047277e4 ra 9000000003b5bfcc tp
9000000120b4c000 sp 9000000120b4fcd0
[14936.382816] a0 060240e70314c08f a1 90000000002c01c1 a2
060240e70314c0e3 a3 90000000002c01d4
[14936.391116] a4 90000000002c0194 a5 060240e70314c090 a6
414d495f544f4f42 a7 7974743d656c6f73
[14936.399416] t0 47414d495f544f4f t1 6e696c6d762f3d45 t2
7220782e352d7a75 t3 7665642f3d746f6f
[14936.407717] t4 6f7220336164732f t5 78756e696c657320 t6
6f736e6f6320303d t7 30537974743d656c
[14936.416018] t8 9000000120b4fd00 u0 9000000120b4fe60 s9
0000000000000001 s0 ffff800002d90034
[14936.424318] s1 0000000000000054 s2 90000000002c0180 s3
9000000120b4fdb0 s4 9000000105d243e8
[14936.432619] s5 0000000000000000 s6 9000000120b4fe60 s7
fffffffffffff000 s8 000000007fffc000
[14936.440920]    ra: 9000000003b5bfcc seq_puts+0x54/0x78
[14936.446029]   ERA: 90000000047277e4 __memcpy_fast+0x58/0xf0
[14936.451567]  CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE)
[14936.457725]  PRMD: 00000004 (PPLV0 +PIE -PWE)
[14936.462057]  EUEN: 00000007 (+FPE +SXE +ASXE -BTE)
[14936.466822]  ECFG: 00071c1c (LIE=2-4,10-12 VS=7)
[14936.471412] ESTAT: 00480000 [ADEM] (IS= ECode=8 EsubCode=1)
[14936.476951]  BADV: 060240e70314c090
[14936.480413]  PRID: 0014c010 (Loongson-64bit, Loongson-3A5000)
[14936.486121] Modules linked in: test_klp_livepatch(K) amdgpu
nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet
nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct rfkill nft_cha$
[14936.531409] Process (udev-worker) (pid: 2571,
threadinfo=00000000ee84fdb9, task=00000000e42294a9)
[14936.540231] Stack : 9000000120b4fdd8 0000000000000001
ffff800002d90034 9000000003bde414
[14936.548194]         9000000105d243c0 9000000003b5cea0
0000000000000000 9000000105d243f8
[14936.556156]         0000000000000000 0000000000000000
0000000000000000 9000000120b4fdb0
[14936.564117]         9000000120b4fdd8 9000000120b4fe60
0000000000000001 90000000053e4000
[14936.572079]         0000555587619eb0 9000000104688a00
0000000000000000 9000000101975740
[14936.580040]         0000000000000000 9000000003bd0cbc
0000000000000400 0000000000000000
[14936.588002]         0000000000000400 9000000003b2583c
0000040000000001 0000000000000400
[14936.595963]         0000000000000000 0000000000000000
0000555587619eb0 0000000000000400
[14936.603925]         0000000000000001 9000000104688a00
0000000000000000 0000000000000000
[14936.611887]         0000000000000000 0000400400000000
0000000000000000 c671bfa1c5a3263c
[14936.619848]         ...
[14936.622275] Call Trace:
[14936.622278] [<90000000047277e4>] __memcpy_fast+0x58/0xf0
[14936.629978] WARNING: unknown SP base reg 0 at __memcpy_fast+0x58/0xf0
[14936.629982]
[14936.637852] Code: 28c0c0b2  28c0e0b3  02c100a5 <29c0012c> 29c0212d
29c0412e  29c0612f  29c08130  29c0a131
[14936.647553]
[14936.654440] ---[ end trace 0000000000000000 ]---

I think it is probably because we skip mem* functions here.

Huacai

> diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
> index 2c0a411f23aa..f01172a8f4e9 100644
> --- a/arch/loongarch/mm/tlb.c
> +++ b/arch/loongarch/mm/tlb.c
> @@ -9,8 +9,9 @@
>  #include <linux/hugetlb.h>
>  #include <linux/export.h>
>
> -#include <asm/cpu.h>
>  #include <asm/bootinfo.h>
> +#include <asm/cpu.h>
> +#include <asm/exception.h>
>  #include <asm/mmu_context.h>
>  #include <asm/pgtable.h>
>  #include <asm/tlb.h>
> @@ -266,24 +267,20 @@ static void setup_tlb_handler(int cpu)
>         setup_ptwalker();
>         local_flush_tlb_all();
>
> +       if (cpu_has_ptw) {
> +               exception_table[EXCCODE_TLBI] = handle_tlb_load_ptw;
> +               exception_table[EXCCODE_TLBL] = handle_tlb_load_ptw;
> +               exception_table[EXCCODE_TLBS] = handle_tlb_store_ptw;
> +               exception_table[EXCCODE_TLBM] = handle_tlb_modify_ptw;
> +       }
> +
>         /* The tlb handlers are generated only once */
>         if (cpu == 0) {
>                 memcpy((void *)tlbrentry, handle_tlb_refill, 0x80);
>                 local_flush_icache_range(tlbrentry, tlbrentry + 0x80);
> -               if (!cpu_has_ptw) {
> -                       set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
> -                       set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE);
> -                       set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE);
> -                       set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE);
> -               } else {
> -                       set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE);
> -                       set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE);
> -                       set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE);
> -                       set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE);
> -               }
> -               set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
> -               set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
> -               set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
> +
> +               for (int i = EXCCODE_TLBL; i <= EXCCODE_TLBPE; i++)
> +                       set_handler(i * VECSIZE, exception_table[i], VECSIZE);
>         }
>  #ifdef CONFIG_NUMA
>         else {
> diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
> index d5d682f3d29f..a44387b838af 100644
> --- a/arch/loongarch/mm/tlbex.S
> +++ b/arch/loongarch/mm/tlbex.S
> @@ -18,6 +18,7 @@
>
>         .macro tlb_do_page_fault, write
>         SYM_CODE_START(tlb_do_page_fault_\write)
> +       UNWIND_HINT_UNDEFINED
>         SAVE_ALL
>         csrrd           a2, LOONGARCH_CSR_BADV
>         move            a0, sp
> @@ -32,6 +33,7 @@
>         tlb_do_page_fault 1
>
>  SYM_CODE_START(handle_tlb_protect)
> +       UNWIND_HINT_UNDEFINED
>         BACKUP_T0T1
>         SAVE_ALL
>         move            a0, sp
> @@ -44,6 +46,7 @@ SYM_CODE_START(handle_tlb_protect)
>  SYM_CODE_END(handle_tlb_protect)
>
>  SYM_CODE_START(handle_tlb_load)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, EXCEPTION_KS0
>         csrwr           t1, EXCEPTION_KS1
>         csrwr           ra, EXCEPTION_KS2
> @@ -190,6 +193,7 @@ nopage_tlb_load:
>  SYM_CODE_END(handle_tlb_load)
>
>  SYM_CODE_START(handle_tlb_load_ptw)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, LOONGARCH_CSR_KS0
>         csrwr           t1, LOONGARCH_CSR_KS1
>         la_abs          t0, tlb_do_page_fault_0
> @@ -197,6 +201,7 @@ SYM_CODE_START(handle_tlb_load_ptw)
>  SYM_CODE_END(handle_tlb_load_ptw)
>
>  SYM_CODE_START(handle_tlb_store)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, EXCEPTION_KS0
>         csrwr           t1, EXCEPTION_KS1
>         csrwr           ra, EXCEPTION_KS2
> @@ -346,6 +351,7 @@ nopage_tlb_store:
>  SYM_CODE_END(handle_tlb_store)
>
>  SYM_CODE_START(handle_tlb_store_ptw)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, LOONGARCH_CSR_KS0
>         csrwr           t1, LOONGARCH_CSR_KS1
>         la_abs          t0, tlb_do_page_fault_1
> @@ -353,6 +359,7 @@ SYM_CODE_START(handle_tlb_store_ptw)
>  SYM_CODE_END(handle_tlb_store_ptw)
>
>  SYM_CODE_START(handle_tlb_modify)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, EXCEPTION_KS0
>         csrwr           t1, EXCEPTION_KS1
>         csrwr           ra, EXCEPTION_KS2
> @@ -500,6 +507,7 @@ nopage_tlb_modify:
>  SYM_CODE_END(handle_tlb_modify)
>
>  SYM_CODE_START(handle_tlb_modify_ptw)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, LOONGARCH_CSR_KS0
>         csrwr           t1, LOONGARCH_CSR_KS1
>         la_abs          t0, tlb_do_page_fault_1
> @@ -507,6 +515,7 @@ SYM_CODE_START(handle_tlb_modify_ptw)
>  SYM_CODE_END(handle_tlb_modify_ptw)
>
>  SYM_CODE_START(handle_tlb_refill)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, LOONGARCH_CSR_TLBRSAVE
>         csrrd           t0, LOONGARCH_CSR_PGD
>         lddir           t0, t0, 3
> diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
> index c74c9921304f..7a4ad96522ac 100644
> --- a/arch/loongarch/vdso/Makefile
> +++ b/arch/loongarch/vdso/Makefile
> @@ -3,6 +3,7 @@
>
>  KASAN_SANITIZE := n
>  KCOV_INSTRUMENT := n
> +OBJECT_FILES_NON_STANDARD := y
>
>  # Include the generic Makefile to check the built vdso.
>  include $(srctree)/lib/vdso/Makefile
> diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> index bb1339c7057b..39f2d4a05208 100644
> --- a/include/linux/compiler.h
> +++ b/include/linux/compiler.h
> @@ -116,6 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
>   */
>  #define __stringify_label(n) #n
>
> +#define __annotate_reachable(c) ({                                     \
> +       asm volatile(__stringify_label(c) ":\n\t"                       \
> +                       ".pushsection .discard.reachable\n\t"           \
> +                       ".long " __stringify_label(c) "b - .\n\t"       \
> +                       ".popsection\n\t");                             \
> +})
> +#define annotate_reachable() __annotate_reachable(__COUNTER__)
> +
>  #define __annotate_unreachable(c) ({                                   \
>         asm volatile(__stringify_label(c) ":\n\t"                       \
>                      ".pushsection .discard.unreachable\n\t"            \
> @@ -128,6 +136,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
>  #define __annotate_jump_table __section(".rodata..c_jump_table")
>
>  #else /* !CONFIG_OBJTOOL */
> +#define annotate_reachable()
>  #define annotate_unreachable()
>  #define __annotate_jump_table
>  #endif /* CONFIG_OBJTOOL */
> diff --git a/scripts/Makefile b/scripts/Makefile
> index 576cf64be667..e4cca53d2285 100644
> --- a/scripts/Makefile
> +++ b/scripts/Makefile
> @@ -31,9 +31,12 @@ HOSTLDLIBS_sign-file = $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null |
>
>  ifdef CONFIG_UNWINDER_ORC
>  ifeq ($(ARCH),x86_64)
> -ARCH := x86
> +SRCARCH := x86
>  endif
> -HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include
> +ifeq ($(ARCH),loongarch)
> +SRCARCH := loongarch
> +endif
> +HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/$(SRCARCH)/include
>  HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED
>  endif
>
> --
> 2.42.0
>
>
Re: [PATCH v5 8/8] LoongArch: Add ORC stack unwinder support
Posted by Tiezhu Yang 2 years ago

On 12/03/2023 08:42 PM, Huacai Chen wrote:
> On Wed, Nov 29, 2023 at 9:07 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>>
>> The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
>> similar in concept to a DWARF unwinder. The difference is that the format
>> of the ORC data is much simpler than DWARF, which in turn allows the ORC
>> unwinder to be much simpler and faster.

...

>> diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
>> index a77bf160bfc4..e3023d9a508c 100644
>> --- a/arch/loongarch/lib/Makefile
>> +++ b/arch/loongarch/lib/Makefile
>> @@ -3,6 +3,8 @@
>>  # Makefile for LoongArch-specific library files.
>>  #
>>
>> +OBJECT_FILES_NON_STANDARD := y
>> +
>>  lib-y  += delay.o memset.o memcpy.o memmove.o \
>>            clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
>>
> I have a draft live-patch patch here:
> https://github.com/chenhuacai/linux/commit/744942cbf456b320f2333638e4bd27d35900284c
>
> But we get such an error:
>
> # selftests: livepatch: test-ftrace.sh
>
>                                       [48/1865]
> [14936.262721] livepatch: failed to register ftrace handler for
> function 'cmdline_proc_show' (-16)
> # TEST: livepatch interaction with ftrace_enabled sysctl ...

...

> I think it is probably because we skip mem* functions here.

I did not test livepatch which is a separate new feature,
let me try it, maybe it needs some time because I am very
busy with some other stuffs.

Thanks,
Tiezhu

Re: [PATCH v5 8/8] LoongArch: Add ORC stack unwinder support
Posted by Huacai Chen 2 years ago
Hi, Tiezhu,

On Wed, Nov 29, 2023 at 9:07 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
> The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
> similar in concept to a DWARF unwinder. The difference is that the format
> of the ORC data is much simpler than DWARF, which in turn allows the ORC
> unwinder to be much simpler and faster.
>
> The ORC data consists of unwind tables which are generated by objtool.
> After analyzing all the code paths of a .o file, it determines information
> about the stack state at each instruction address in the file and outputs
> that information to the .orc_unwind and .orc_unwind_ip sections.
>
> The per-object ORC sections are combined at link time and are sorted and
> post-processed at boot time. The unwinder uses the resulting data to
> correlate instruction addresses with their stack states at run time.
>
> Most of the logic are similar with x86, in order to get ra info before ra
> is saved into stack, add ra_reg and ra_offset into orc_entry. At the same
> time, modify some arch-specific code to silence the objtool warnings.
>
> Co-developed-by: Jinyang He <hejinyang@loongson.cn>
> Signed-off-by: Jinyang He <hejinyang@loongson.cn>
> Co-developed-by: Youling Tang <tangyouling@loongson.cn>
> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
> ---
>  arch/loongarch/Kconfig                    |   2 +
>  arch/loongarch/Kconfig.debug              |  11 +
>  arch/loongarch/Makefile                   |  19 +
>  arch/loongarch/include/asm/Kbuild         |   2 +
>  arch/loongarch/include/asm/bug.h          |   1 +
>  arch/loongarch/include/asm/exception.h    |   2 +
>  arch/loongarch/include/asm/module.h       |   7 +
>  arch/loongarch/include/asm/orc_header.h   |  18 +
>  arch/loongarch/include/asm/orc_lookup.h   |  31 ++
>  arch/loongarch/include/asm/orc_types.h    |  58 +++
>  arch/loongarch/include/asm/stackframe.h   |   3 +
>  arch/loongarch/include/asm/unwind.h       |  19 +-
>  arch/loongarch/include/asm/unwind_hints.h |  28 ++
>  arch/loongarch/kernel/Makefile            |   4 +
>  arch/loongarch/kernel/entry.S             |   6 +-
>  arch/loongarch/kernel/fpu.S               |   7 +
>  arch/loongarch/kernel/genex.S             |   6 +-
>  arch/loongarch/kernel/lbt.S               |   5 +
>  arch/loongarch/kernel/module.c            |  22 +-
>  arch/loongarch/kernel/relocate_kernel.S   |   2 +
>  arch/loongarch/kernel/setup.c             |   2 +
>  arch/loongarch/kernel/stacktrace.c        |   1 +
>  arch/loongarch/kernel/traps.c             |  42 +-
>  arch/loongarch/kernel/unwind_orc.c        | 516 ++++++++++++++++++++++
>  arch/loongarch/kernel/vmlinux.lds.S       |   3 +
>  arch/loongarch/kvm/switch.S               |   7 +-
>  arch/loongarch/lib/Makefile               |   2 +
>  arch/loongarch/mm/tlb.c                   |  27 +-
>  arch/loongarch/mm/tlbex.S                 |   9 +
>  arch/loongarch/vdso/Makefile              |   1 +
>  include/linux/compiler.h                  |   9 +
>  scripts/Makefile                          |   7 +-
>  32 files changed, 839 insertions(+), 40 deletions(-)
>  create mode 100644 arch/loongarch/include/asm/orc_header.h
>  create mode 100644 arch/loongarch/include/asm/orc_lookup.h
>  create mode 100644 arch/loongarch/include/asm/orc_types.h
>  create mode 100644 arch/loongarch/include/asm/unwind_hints.h
>  create mode 100644 arch/loongarch/kernel/unwind_orc.c
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index ee123820a476..eea57378646d 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -132,6 +132,7 @@ config LOONGARCH
>         select HAVE_KVM
>         select HAVE_MOD_ARCH_SPECIFIC
>         select HAVE_NMI
> +       select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS
>         select HAVE_PCI
>         select HAVE_PERF_EVENTS
>         select HAVE_PERF_REGS
> @@ -143,6 +144,7 @@ config LOONGARCH
>         select HAVE_SAMPLE_FTRACE_DIRECT
>         select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
>         select HAVE_SETUP_PER_CPU_AREA if NUMA
> +       select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
>         select HAVE_STACKPROTECTOR
>         select HAVE_SYSCALL_TRACEPOINTS
>         select HAVE_TIF_NOHZ
> diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug
> index 8d36aab53008..98d60630c3d4 100644
> --- a/arch/loongarch/Kconfig.debug
> +++ b/arch/loongarch/Kconfig.debug
> @@ -26,4 +26,15 @@ config UNWINDER_PROLOGUE
>           Some of the addresses it reports may be incorrect (but better than the
>           Guess unwinder).
>
> +config UNWINDER_ORC
> +       bool "ORC unwinder"
> +       select OBJTOOL
> +       help
> +         This option enables the ORC (Oops Rewind Capability) unwinder for
> +         unwinding kernel stack traces.  It uses a custom data format which is
> +         a simplified version of the DWARF Call Frame Information standard.
> +
> +         Enabling this option will increase the kernel's runtime memory usage
> +         by roughly 2-4MB, depending on your kernel config.
> +
>  endchoice
> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
> index 204b94b2e6aa..1f0e41f8ab61 100644
> --- a/arch/loongarch/Makefile
> +++ b/arch/loongarch/Makefile
> @@ -25,6 +25,18 @@ endif
>  32bit-emul             = elf32loongarch
>  64bit-emul             = elf64loongarch
>
> +ifdef CONFIG_UNWINDER_ORC
> +orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h
> +orc_hash_sh := $(srctree)/scripts/orc_hash.sh
> +targets += $(orc_hash_h)
> +quiet_cmd_orc_hash = GEN     $@
> +      cmd_orc_hash = mkdir -p $(dir $@); \
> +                    $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@
> +$(orc_hash_h): $(srctree)/arch/loongarch/include/asm/orc_types.h $(orc_hash_sh) FORCE
> +       $(call if_changed,orc_hash)
> +archprepare: $(orc_hash_h)
> +endif
> +
>  ifdef CONFIG_DYNAMIC_FTRACE
>  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
>  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> @@ -81,6 +93,13 @@ KBUILD_AFLAGS_MODULE         += -Wa,-mla-global-with-abs
>  KBUILD_CFLAGS_MODULE           += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
>  endif
>
> +KBUILD_AFLAGS                  += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
> +KBUILD_CFLAGS                  += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
> +
> +ifdef CONFIG_OBJTOOL
> +KBUILD_CFLAGS                  += -fno-optimize-sibling-calls -fno-jump-tables -falign-functions=4 -falign-labels=4
> +endif
> +
>  ifeq ($(CONFIG_RELOCATABLE),y)
>  KBUILD_CFLAGS_KERNEL           += -fPIE
>  LDFLAGS_vmlinux                        += -static -pie --no-dynamic-linker -z notext
> diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
> index 93783fa24f6e..a97c0edbb866 100644
> --- a/arch/loongarch/include/asm/Kbuild
> +++ b/arch/loongarch/include/asm/Kbuild
> @@ -1,4 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
> +generated-y += orc_hash.h
> +
>  generic-y += dma-contiguous.h
>  generic-y += mcs_spinlock.h
>  generic-y += parport.h
> diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h
> index d4ca3ba25418..08388876ade4 100644
> --- a/arch/loongarch/include/asm/bug.h
> +++ b/arch/loongarch/include/asm/bug.h
> @@ -44,6 +44,7 @@
>  do {                                                           \
>         instrumentation_begin();                                \
>         __BUG_FLAGS(BUGFLAG_WARNING|(flags));                   \
> +       annotate_reachable();                                   \
>         instrumentation_end();                                  \
>  } while (0)
>
> diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h
> index af74a3fdcad1..c6d20736fd92 100644
> --- a/arch/loongarch/include/asm/exception.h
> +++ b/arch/loongarch/include/asm/exception.h
> @@ -6,6 +6,8 @@
>  #include <asm/ptrace.h>
>  #include <linux/kprobes.h>
>
> +extern void *exception_table[];
> +
>  void show_registers(struct pt_regs *regs);
>
>  asmlinkage void cache_parity_error(void);
> diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
> index 2ecd82bb64e1..f33f3fd32ecc 100644
> --- a/arch/loongarch/include/asm/module.h
> +++ b/arch/loongarch/include/asm/module.h
> @@ -6,6 +6,7 @@
>  #define _ASM_MODULE_H
>
>  #include <asm/inst.h>
> +#include <asm/orc_types.h>
>  #include <asm-generic/module.h>
>
>  #define RELA_STACK_DEPTH 16
> @@ -21,6 +22,12 @@ struct mod_arch_specific {
>         struct mod_section plt;
>         struct mod_section plt_idx;
>
> +#ifdef CONFIG_UNWINDER_ORC
> +       unsigned int num_orcs;
> +       int *orc_unwind_ip;
> +       struct orc_entry *orc_unwind;
> +#endif
> +
>         /* For CONFIG_DYNAMIC_FTRACE */
>         struct plt_entry *ftrace_trampolines;
>  };
> diff --git a/arch/loongarch/include/asm/orc_header.h b/arch/loongarch/include/asm/orc_header.h
> new file mode 100644
> index 000000000000..f9d509c3fd70
> --- /dev/null
> +++ b/arch/loongarch/include/asm/orc_header.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +#ifndef _ORC_HEADER_H
> +#define _ORC_HEADER_H
> +
> +#include <linux/types.h>
> +#include <linux/compiler.h>
> +#include <asm/orc_hash.h>
> +
> +/*
> + * The header is currently a 20-byte hash of the ORC entry definition; see
> + * scripts/orc_hash.sh.
> + */
> +#define ORC_HEADER                                     \
> +       __used __section(".orc_header") __aligned(4)    \
> +       static const u8 orc_header[] = { ORC_HASH }
> +
> +#endif /* _ORC_HEADER_H */
> diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h
> new file mode 100644
> index 000000000000..b02e6357def4
> --- /dev/null
> +++ b/arch/loongarch/include/asm/orc_lookup.h
> @@ -0,0 +1,31 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ORC_LOOKUP_H
> +#define _ORC_LOOKUP_H
> +
> +/*
> + * This is a lookup table for speeding up access to the .orc_unwind table.
> + * Given an input address offset, the corresponding lookup table entry
> + * specifies a subset of the .orc_unwind table to search.
> + *
> + * Each block represents the end of the previous range and the start of the
> + * next range.  An extra block is added to give the last range an end.
> + *
> + * The block size should be a power of 2 to avoid a costly 'div' instruction.
> + *
> + * A block size of 256 was chosen because it roughly doubles unwinder
> + * performance while only adding ~5% to the ORC data footprint.
> + */
> +#define LOOKUP_BLOCK_ORDER     8
> +#define LOOKUP_BLOCK_SIZE      (1 << LOOKUP_BLOCK_ORDER)
> +
> +#ifndef LINKER_SCRIPT
> +
> +extern unsigned int orc_lookup[];
> +extern unsigned int orc_lookup_end[];
> +
> +#define LOOKUP_START_IP                (unsigned long)_stext
> +#define LOOKUP_STOP_IP         (unsigned long)_etext
> +
> +#endif /* LINKER_SCRIPT */
> +
> +#endif /* _ORC_LOOKUP_H */
> diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h
> new file mode 100644
> index 000000000000..caf1f71a1057
> --- /dev/null
> +++ b/arch/loongarch/include/asm/orc_types.h
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ORC_TYPES_H
> +#define _ORC_TYPES_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * The ORC_REG_* registers are base registers which are used to find other
> + * registers on the stack.
> + *
> + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
> + * address of the previous frame: the caller's SP before it called the current
> + * function.
> + *
> + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
> + * the current frame.
> + *
> + * The most commonly used base registers are SP and FP -- which the previous SP
> + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is
> + * usually based on.
> + *
> + * The rest of the base registers are needed for special cases like entry code
> + * and GCC realigned stacks.
> + */
> +#define ORC_REG_UNDEFINED              0
> +#define ORC_REG_PREV_SP                        1
> +#define ORC_REG_SP                     2
> +#define ORC_REG_FP                     3
> +#define ORC_REG_MAX                    4
> +
> +#define ORC_TYPE_UNDEFINED             0
> +#define ORC_TYPE_END_OF_STACK          1
> +#define ORC_TYPE_CALL                  2
> +#define ORC_TYPE_REGS                  3
> +#define ORC_TYPE_REGS_PARTIAL          4
> +
> +#ifndef __ASSEMBLY__
> +/*
> + * This struct is more or less a vastly simplified version of the DWARF Call
> + * Frame Information standard.  It contains only the necessary parts of DWARF
> + * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
> + * unwinder how to find the previous SP and FP (and sometimes entry regs) on
> + * the stack for a given code address.  Each instance of the struct corresponds
> + * to one or more code locations.
> + */
> +struct orc_entry {
> +       s16             sp_offset;
> +       s16             fp_offset;
> +       s16             ra_offset;
> +       unsigned int    sp_reg:4;
> +       unsigned int    fp_reg:4;
> +       unsigned int    ra_reg:4;
> +       unsigned int    type:3;
> +       unsigned int    signal:1;
> +};
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* _ORC_TYPES_H */
> diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
> index 4fb1e6408b98..45b507a7b06f 100644
> --- a/arch/loongarch/include/asm/stackframe.h
> +++ b/arch/loongarch/include/asm/stackframe.h
> @@ -13,6 +13,7 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/loongarch.h>
>  #include <asm/thread_info.h>
> +#include <asm/unwind_hints.h>
>
>  /* Make the addition of cfi info a little easier. */
>         .macro cfi_rel_offset reg offset=0 docfi=0
> @@ -162,6 +163,7 @@
>         li.w    t0, CSR_CRMD_WE
>         csrxchg t0, t0, LOONGARCH_CSR_CRMD
>  #endif
> +       UNWIND_HINT_REGS
>         .endm
>
>         .macro  SAVE_ALL docfi=0
> @@ -219,6 +221,7 @@
>
>         .macro  RESTORE_SP_AND_RET docfi=0
>         cfi_ld  sp, PT_R3, \docfi
> +       UNWIND_HINT_FUNC
>         ertn
>         .endm
>
> diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
> index b9dce87afd2e..c7f52d406cce 100644
> --- a/arch/loongarch/include/asm/unwind.h
> +++ b/arch/loongarch/include/asm/unwind.h
> @@ -16,6 +16,7 @@
>  enum unwinder_type {
>         UNWINDER_GUESS,
>         UNWINDER_PROLOGUE,
> +       UNWINDER_ORC,
>  };
>
>  struct unwind_state {
> @@ -24,7 +25,7 @@ struct unwind_state {
>         struct task_struct *task;
>         bool first, error, reset;
>         int graph_idx;
> -       unsigned long sp, pc, ra;
> +       unsigned long sp, fp, pc, ra;
>  };
>
>  bool default_next_frame(struct unwind_state *state);
> @@ -34,6 +35,14 @@ void unwind_start(struct unwind_state *state,
>  bool unwind_next_frame(struct unwind_state *state);
>  unsigned long unwind_get_return_address(struct unwind_state *state);
>
> +#ifdef CONFIG_UNWINDER_ORC
> +void unwind_init(void);
> +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size);
> +#else
> +static inline void unwind_init(void) {}
> +static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {}
> +#endif
> +
>  static inline bool unwind_done(struct unwind_state *state)
>  {
>         return state->stack_info.type == STACK_TYPE_UNKNOWN;
> @@ -61,14 +70,17 @@ static __always_inline void __unwind_start(struct unwind_state *state,
>                 state->sp = regs->regs[3];
>                 state->pc = regs->csr_era;
>                 state->ra = regs->regs[1];
> +               state->fp = regs->regs[22];
>         } else if (task && task != current) {
>                 state->sp = thread_saved_fp(task);
>                 state->pc = thread_saved_ra(task);
>                 state->ra = 0;
> +               state->fp = 0;
>         } else {
>                 state->sp = (unsigned long)__builtin_frame_address(0);
>                 state->pc = (unsigned long)__builtin_return_address(0);
>                 state->ra = 0;
> +               state->fp = 0;
>         }
>         state->task = task;
>         get_stack_info(state->sp, state->task, &state->stack_info);
> @@ -77,6 +89,9 @@ static __always_inline void __unwind_start(struct unwind_state *state,
>
>  static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state)
>  {
> -       return unwind_done(state) ? 0 : state->pc;
> +       if (unwind_done(state))
> +               return 0;
> +
> +       return __kernel_text_address(state->pc) ? state->pc : 0;
>  }
>  #endif /* _ASM_UNWIND_H */
> diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h
> new file mode 100644
> index 000000000000..82443fed3167
> --- /dev/null
> +++ b/arch/loongarch/include/asm/unwind_hints.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_LOONGARCH_UNWIND_HINTS_H
> +#define _ASM_LOONGARCH_UNWIND_HINTS_H
> +
> +#include <linux/objtool.h>
> +#include <asm/orc_types.h>
> +
> +#ifdef __ASSEMBLY__
> +
> +.macro UNWIND_HINT_UNDEFINED
> +       UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
> +.endm
> +
> +.macro UNWIND_HINT_EMPTY
> +       UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL
> +.endm
> +
> +.macro UNWIND_HINT_REGS
> +       UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_REGS
> +.endm
> +
> +.macro UNWIND_HINT_FUNC
> +       UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL
> +.endm
> +
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 4fcc168f0732..ac47e11c227e 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -3,6 +3,8 @@
>  # Makefile for the Linux/LoongArch kernel.
>  #
>
> +OBJECT_FILES_NON_STANDARD_head.o := y
> +
>  extra-y                := vmlinux.lds
>
>  obj-y          += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
> @@ -21,6 +23,7 @@ obj-$(CONFIG_ARCH_STRICT_ALIGN)       += unaligned.o
>
>  CFLAGS_module.o                += $(call cc-option,-Wno-override-init,)
>  CFLAGS_syscall.o       += $(call cc-option,-Wno-override-init,)
> +CFLAGS_traps.o         += $(call cc-option,-Wno-override-init,)
>  CFLAGS_perf_event.o    += $(call cc-option,-Wno-override-init,)
>
>  ifdef CONFIG_FUNCTION_TRACER
> @@ -62,6 +65,7 @@ obj-$(CONFIG_CRASH_DUMP)      += crash_dump.o
>
>  obj-$(CONFIG_UNWINDER_GUESS)   += unwind_guess.o
>  obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
> +obj-$(CONFIG_UNWINDER_ORC)     += unwind_orc.o
>
>  obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_regs.o
>  obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
> diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
> index 1ec8e4c4cc2b..2f5c74d26d5f 100644
> --- a/arch/loongarch/kernel/entry.S
> +++ b/arch/loongarch/kernel/entry.S
> @@ -14,11 +14,12 @@
>  #include <asm/regdef.h>
>  #include <asm/stackframe.h>
>  #include <asm/thread_info.h>
> +#include <asm/unwind_hints.h>
>
>         .text
>         .cfi_sections   .debug_frame
> -       .align  5
>  SYM_CODE_START(handle_syscall)
> +       UNWIND_HINT_UNDEFINED
>         csrrd           t0, PERCPU_BASE_KS
>         la.pcrel        t1, kernelsp
>         add.d           t1, t1, t0
> @@ -57,6 +58,7 @@ SYM_CODE_START(handle_syscall)
>         cfi_st          fp, PT_R22
>
>         SAVE_STATIC
> +       UNWIND_HINT_REGS
>
>  #ifdef CONFIG_KGDB
>         li.w            t1, CSR_CRMD_WE
> @@ -75,6 +77,7 @@ SYM_CODE_END(handle_syscall)
>  _ASM_NOKPROBE(handle_syscall)
>
>  SYM_CODE_START(ret_from_fork)
> +       UNWIND_HINT_REGS
>         bl              schedule_tail           # a0 = struct task_struct *prev
>         move            a0, sp
>         bl              syscall_exit_to_user_mode
> @@ -84,6 +87,7 @@ SYM_CODE_START(ret_from_fork)
>  SYM_CODE_END(ret_from_fork)
>
>  SYM_CODE_START(ret_from_kernel_thread)
> +       UNWIND_HINT_REGS
>         bl              schedule_tail           # a0 = struct task_struct *prev
>         move            a0, s1
>         jirl            ra, s0, 0
> diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
> index d53ab10f4644..487933899c5d 100644
> --- a/arch/loongarch/kernel/fpu.S
> +++ b/arch/loongarch/kernel/fpu.S
> @@ -15,6 +15,7 @@
>  #include <asm/fpregdef.h>
>  #include <asm/loongarch.h>
>  #include <asm/regdef.h>
> +#include <asm/unwind_hints.h>
>
>  #define FPU_REG_WIDTH          8
>  #define LSX_REG_WIDTH          16
> @@ -524,3 +525,9 @@ SYM_FUNC_END(_restore_lasx_context)
>  .L_fpu_fault:
>         li.w    a0, -EFAULT                             # failure
>         jr      ra
> +
> +#ifdef CONFIG_CPU_HAS_LBT
> +STACK_FRAME_NON_STANDARD _restore_fp
> +STACK_FRAME_NON_STANDARD _restore_lsx
> +STACK_FRAME_NON_STANDARD _restore_lasx
> +#endif
> diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
> index 2bb3aa2dcfcb..6b7ec9f1134d 100644
> --- a/arch/loongarch/kernel/genex.S
> +++ b/arch/loongarch/kernel/genex.S
> @@ -16,7 +16,6 @@
>  #include <asm/stackframe.h>
>  #include <asm/thread_info.h>
>
> -       .align  5
>  SYM_FUNC_START(__arch_cpu_idle)
>         /* start of rollback region */
>         LONG_L  t0, tp, TI_FLAGS
> @@ -32,6 +31,7 @@ SYM_FUNC_START(__arch_cpu_idle)
>  SYM_FUNC_END(__arch_cpu_idle)
>
>  SYM_CODE_START(handle_vint)
> +       UNWIND_HINT_UNDEFINED
>         BACKUP_T0T1
>         SAVE_ALL
>         la_abs  t1, __arch_cpu_idle
> @@ -49,6 +49,7 @@ SYM_CODE_START(handle_vint)
>  SYM_CODE_END(handle_vint)
>
>  SYM_CODE_START(except_vec_cex)
> +       UNWIND_HINT_UNDEFINED
>         b       cache_parity_error
>  SYM_CODE_END(except_vec_cex)
>
> @@ -65,8 +66,8 @@ SYM_CODE_END(except_vec_cex)
>         .endm
>
>         .macro  BUILD_HANDLER exception handler prep
> -       .align  5
>         SYM_CODE_START(handle_\exception)
> +       UNWIND_HINT_UNDEFINED
>         666:
>         BACKUP_T0T1
>         SAVE_ALL
> @@ -94,6 +95,7 @@ SYM_CODE_END(except_vec_cex)
>         BUILD_HANDLER reserved reserved none    /* others */
>
>  SYM_CODE_START(handle_sys)
> +       UNWIND_HINT_UNDEFINED
>         la_abs  t0, handle_syscall
>         jr      t0
>  SYM_CODE_END(handle_sys)
> diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
> index 9c75120a26d8..4d6914f84c7e 100644
> --- a/arch/loongarch/kernel/lbt.S
> +++ b/arch/loongarch/kernel/lbt.S
> @@ -11,6 +11,7 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/errno.h>
>  #include <asm/regdef.h>
> +#include <asm/unwind_hints.h>
>
>  #define SCR_REG_WIDTH 8
>
> @@ -153,3 +154,7 @@ SYM_FUNC_END(_restore_ftop_context)
>  .L_lbt_fault:
>         li.w            a0, -EFAULT             # failure
>         jr              ra
> +
> +#ifdef CONFIG_CPU_HAS_LBT
> +STACK_FRAME_NON_STANDARD _restore_ftop_context
> +#endif
This file is only compiled if CONFIG_CPU_HAS_LBT is set, so #ifdef can
be removed.

Huacai

> diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
> index b13b2858fe39..c7d0338d12c1 100644
> --- a/arch/loongarch/kernel/module.c
> +++ b/arch/loongarch/kernel/module.c
> @@ -20,6 +20,7 @@
>  #include <linux/kernel.h>
>  #include <asm/alternative.h>
>  #include <asm/inst.h>
> +#include <asm/unwind.h>
>
>  static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
>  {
> @@ -515,15 +516,28 @@ static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
>  int module_finalize(const Elf_Ehdr *hdr,
>                     const Elf_Shdr *sechdrs, struct module *mod)
>  {
> -       const Elf_Shdr *s, *se;
>         const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
> +       const Elf_Shdr *s, *alt = NULL, *orc = NULL, *orc_ip = NULL, *ftrace = NULL;
>
> -       for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
> +       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
>                 if (!strcmp(".altinstructions", secstrs + s->sh_name))
> -                       apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size);
> +                       alt = s;
> +               if (!strcmp(".orc_unwind", secstrs + s->sh_name))
> +                       orc = s;
> +               if (!strcmp(".orc_unwind_ip", secstrs + s->sh_name))
> +                       orc_ip = s;
>                 if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name))
> -                       module_init_ftrace_plt(hdr, s, mod);
> +                       ftrace = s;
>         }
>
> +       if (alt)
> +               apply_alternatives((void *)alt->sh_addr, (void *)alt->sh_addr + alt->sh_size);
> +
> +       if (orc && orc_ip)
> +               unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size);
> +
> +       if (ftrace)
> +               module_init_ftrace_plt(hdr, ftrace, mod);
> +
>         return 0;
>  }
> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
> index f49f6b053763..bcc191d278c1 100644
> --- a/arch/loongarch/kernel/relocate_kernel.S
> +++ b/arch/loongarch/kernel/relocate_kernel.S
> @@ -15,6 +15,7 @@
>  #include <asm/addrspace.h>
>
>  SYM_CODE_START(relocate_new_kernel)
> +       UNWIND_HINT_UNDEFINED
>         /*
>          * a0: EFI boot flag for the new kernel
>          * a1: Command line pointer for the new kernel
> @@ -90,6 +91,7 @@ SYM_CODE_END(relocate_new_kernel)
>   * then start at the entry point from LOONGARCH_IOCSR_MBUF0.
>   */
>  SYM_CODE_START(kexec_smp_wait)
> +       UNWIND_HINT_UNDEFINED
>  1:     li.w            t0, 0x100                       /* wait for init loop */
>  2:     addi.w          t0, t0, -1                      /* limit mailbox access */
>         bnez            t0, 2b
> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> index d183a745fb85..ec4459c61db6 100644
> --- a/arch/loongarch/kernel/setup.c
> +++ b/arch/loongarch/kernel/setup.c
> @@ -47,6 +47,7 @@
>  #include <asm/sections.h>
>  #include <asm/setup.h>
>  #include <asm/time.h>
> +#include <asm/unwind.h>
>
>  #define SMBIOS_BIOSSIZE_OFFSET         0x09
>  #define SMBIOS_BIOSEXTERN_OFFSET       0x13
> @@ -605,6 +606,7 @@ static void __init prefill_possible_map(void)
>  void __init setup_arch(char **cmdline_p)
>  {
>         cpu_probe();
> +       unwind_init();
>
>         init_environ();
>         efi_init();
> diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
> index 92270f14db94..9848d427cbfa 100644
> --- a/arch/loongarch/kernel/stacktrace.c
> +++ b/arch/loongarch/kernel/stacktrace.c
> @@ -29,6 +29,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
>                         regs->csr_era = thread_saved_ra(task);
>                 }
>                 regs->regs[1] = 0;
> +               regs->regs[22] = 0;
>         }
>
>         for (unwind_start(&state, task, regs);
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index aebfc3733a76..f9f4eb00c92e 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -53,6 +53,32 @@
>
>  #include "access-helper.h"
>
> +void *exception_table[EXCCODE_INT_START] = {
> +       [0 ... EXCCODE_INT_START - 1] = handle_reserved,
> +
> +       [EXCCODE_TLBI]          = handle_tlb_load,
> +       [EXCCODE_TLBL]          = handle_tlb_load,
> +       [EXCCODE_TLBS]          = handle_tlb_store,
> +       [EXCCODE_TLBM]          = handle_tlb_modify,
> +       [EXCCODE_TLBNR]         = handle_tlb_protect,
> +       [EXCCODE_TLBNX]         = handle_tlb_protect,
> +       [EXCCODE_TLBPE]         = handle_tlb_protect,
> +       [EXCCODE_ADE]           = handle_ade,
> +       [EXCCODE_ALE]           = handle_ale,
> +       [EXCCODE_BCE]           = handle_bce,
> +       [EXCCODE_SYS]           = handle_sys,
> +       [EXCCODE_BP]            = handle_bp,
> +       [EXCCODE_INE]           = handle_ri,
> +       [EXCCODE_IPE]           = handle_ri,
> +       [EXCCODE_FPDIS]         = handle_fpu,
> +       [EXCCODE_LSXDIS]        = handle_lsx,
> +       [EXCCODE_LASXDIS]       = handle_lasx,
> +       [EXCCODE_FPE]           = handle_fpe,
> +       [EXCCODE_WATCH]         = handle_watch,
> +       [EXCCODE_BTDIS]         = handle_lbt,
> +};
> +EXPORT_SYMBOL_GPL(exception_table);
> +
>  static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
>                            const char *loglvl, bool user)
>  {
> @@ -1150,19 +1176,9 @@ void __init trap_init(void)
>         for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++)
>                 set_handler(i * VECSIZE, handle_vint, VECSIZE);
>
> -       set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE);
> -       set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE);
> -       set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE);
> -       set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE);
> -       set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE);
> -       set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE);
> -       set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE);
> -       set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE);
> -       set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE);
> -       set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE);
> -       set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE);
> -       set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE);
> -       set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE);
> +       /* Set exception vector handler */
> +       for (i = EXCCODE_ADE; i <= EXCCODE_BTDIS; i++)
> +               set_handler(i * VECSIZE, exception_table[i], VECSIZE);
>
>         cache_error_setup();
>
> diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c
> new file mode 100644
> index 000000000000..15f18d1e0dad
> --- /dev/null
> +++ b/arch/loongarch/kernel/unwind_orc.c
> @@ -0,0 +1,516 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <linux/objtool.h>
> +#include <linux/module.h>
> +#include <linux/sort.h>
> +#include <asm/exception.h>
> +#include <asm/orc_header.h>
> +#include <asm/orc_lookup.h>
> +#include <asm/orc_types.h>
> +#include <asm/ptrace.h>
> +#include <asm/setup.h>
> +#include <asm/stacktrace.h>
> +#include <asm/tlb.h>
> +#include <asm/unwind.h>
> +
> +ORC_HEADER;
> +
> +#define orc_warn(fmt, ...) \
> +       printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
> +
> +extern int __start_orc_unwind_ip[];
> +extern int __stop_orc_unwind_ip[];
> +extern struct orc_entry __start_orc_unwind[];
> +extern struct orc_entry __stop_orc_unwind[];
> +
> +static bool orc_init __ro_after_init;
> +static unsigned int lookup_num_blocks __ro_after_init;
> +
> +/* Fake frame pointer entry -- used as a fallback for generated code */
> +static struct orc_entry orc_fp_entry = {
> +       .sp_reg         = ORC_REG_FP,
> +       .sp_offset      = 16,
> +       .fp_reg         = ORC_REG_PREV_SP,
> +       .fp_offset      = -16,
> +       .ra_reg         = ORC_REG_PREV_SP,
> +       .ra_offset      = -8,
> +       .type           = ORC_TYPE_CALL
> +};
> +
> +/*
> + * If we crash with IP==0, the last successfully executed instruction
> + * was probably an indirect function call with a NULL function pointer,
> + * and we don't have unwind information for NULL.
> + * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function
> + * pointer into its parent and then continue normally from there.
> + */
> +static struct orc_entry orc_null_entry = {
> +       .sp_reg         = ORC_REG_SP,
> +       .sp_offset      = sizeof(long),
> +       .fp_reg         = ORC_REG_UNDEFINED,
> +       .type           = ORC_TYPE_CALL
> +};
> +
> +static inline unsigned long orc_ip(const int *ip)
> +{
> +       return (unsigned long)ip + *ip;
> +}
> +
> +static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
> +                                   unsigned int num_entries, unsigned long ip)
> +{
> +       int *first = ip_table;
> +       int *mid = first, *found = first;
> +       int *last = ip_table + num_entries - 1;
> +
> +       if (!num_entries)
> +               return NULL;
> +
> +       /*
> +        * Do a binary range search to find the rightmost duplicate of a given
> +        * starting address.  Some entries are section terminators which are
> +        * "weak" entries for ensuring there are no gaps.  They should be
> +        * ignored when they conflict with a real entry.
> +        */
> +       while (first <= last) {
> +               mid = first + ((last - first) / 2);
> +
> +               if (orc_ip(mid) <= ip) {
> +                       found = mid;
> +                       first = mid + 1;
> +               } else
> +                       last = mid - 1;
> +       }
> +
> +       return u_table + (found - ip_table);
> +}
> +
> +#ifdef CONFIG_MODULES
> +static struct orc_entry *orc_module_find(unsigned long ip)
> +{
> +       struct module *mod;
> +
> +       mod = __module_address(ip);
> +       if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
> +               return NULL;
> +
> +       return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, mod->arch.num_orcs, ip);
> +}
> +#else
> +static struct orc_entry *orc_module_find(unsigned long ip)
> +{
> +       return NULL;
> +}
> +#endif
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +static struct orc_entry *orc_find(unsigned long ip);
> +
> +/*
> + * Ftrace dynamic trampolines do not have orc entries of their own.
> + * But they are copies of the ftrace entries that are static and
> + * defined in ftrace_*.S, which do have orc entries.
> + *
> + * If the unwinder comes across a ftrace trampoline, then find the
> + * ftrace function that was used to create it, and use that ftrace
> + * function's orc entry, as the placement of the return code in
> + * the stack will be identical.
> + */
> +static struct orc_entry *orc_ftrace_find(unsigned long ip)
> +{
> +       struct ftrace_ops *ops;
> +       unsigned long tramp_addr, offset;
> +
> +       ops = ftrace_ops_trampoline(ip);
> +       if (!ops)
> +               return NULL;
> +
> +       /* Set tramp_addr to the start of the code copied by the trampoline */
> +       if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
> +               tramp_addr = (unsigned long)ftrace_regs_caller;
> +       else
> +               tramp_addr = (unsigned long)ftrace_caller;
> +
> +       /* Now place tramp_addr to the location within the trampoline ip is at */
> +       offset = ip - ops->trampoline;
> +       tramp_addr += offset;
> +
> +       /* Prevent unlikely recursion */
> +       if (ip == tramp_addr)
> +               return NULL;
> +
> +       return orc_find(tramp_addr);
> +}
> +#else
> +static struct orc_entry *orc_ftrace_find(unsigned long ip)
> +{
> +       return NULL;
> +}
> +#endif
> +
> +static struct orc_entry *orc_find(unsigned long ip)
> +{
> +       static struct orc_entry *orc;
> +
> +       if (ip == 0)
> +               return &orc_null_entry;
> +
> +       /* For non-init vmlinux addresses, use the fast lookup table: */
> +       if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
> +               unsigned int idx, start, stop;
> +
> +               idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
> +
> +               if (unlikely((idx >= lookup_num_blocks-1))) {
> +                       orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
> +                                idx, lookup_num_blocks, (void *)ip);
> +                       return NULL;
> +               }
> +
> +               start = orc_lookup[idx];
> +               stop = orc_lookup[idx + 1] + 1;
> +
> +               if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
> +                            (__start_orc_unwind + stop > __stop_orc_unwind))) {
> +                       orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
> +                                idx, lookup_num_blocks, start, stop, (void *)ip);
> +                       return NULL;
> +               }
> +
> +               return __orc_find(__start_orc_unwind_ip + start,
> +                                 __start_orc_unwind + start, stop - start, ip);
> +       }
> +
> +       /* vmlinux .init slow lookup: */
> +       if (is_kernel_inittext(ip))
> +               return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
> +                                 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
> +
> +       /* Module lookup: */
> +       orc = orc_module_find(ip);
> +       if (orc)
> +               return orc;
> +
> +       return orc_ftrace_find(ip);
> +}
> +
> +#ifdef CONFIG_MODULES
> +
> +static DEFINE_MUTEX(sort_mutex);
> +static int *cur_orc_ip_table = __start_orc_unwind_ip;
> +static struct orc_entry *cur_orc_table = __start_orc_unwind;
> +
> +static void orc_sort_swap(void *_a, void *_b, int size)
> +{
> +       int delta = _b - _a;
> +       int *a = _a, *b = _b, tmp;
> +       struct orc_entry *orc_a, *orc_b;
> +
> +       /* Swap the .orc_unwind_ip entries: */
> +       tmp = *a;
> +       *a = *b + delta;
> +       *b = tmp - delta;
> +
> +       /* Swap the corresponding .orc_unwind entries: */
> +       orc_a = cur_orc_table + (a - cur_orc_ip_table);
> +       orc_b = cur_orc_table + (b - cur_orc_ip_table);
> +       swap(*orc_a, *orc_b);
> +}
> +
> +static int orc_sort_cmp(const void *_a, const void *_b)
> +{
> +       const int *a = _a, *b = _b;
> +       unsigned long a_val = orc_ip(a);
> +       unsigned long b_val = orc_ip(b);
> +       struct orc_entry *orc_a;
> +
> +       if (a_val > b_val)
> +               return 1;
> +       if (a_val < b_val)
> +               return -1;
> +
> +       /*
> +        * The "weak" section terminator entries need to always be first
> +        * to ensure the lookup code skips them in favor of real entries.
> +        * These terminator entries exist to handle any gaps created by
> +        * whitelisted .o files which didn't get objtool generation.
> +        */
> +       orc_a = cur_orc_table + (a - cur_orc_ip_table);
> +
> +       return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
> +}
> +
> +void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
> +                       void *_orc, size_t orc_size)
> +{
> +       int *orc_ip = _orc_ip;
> +       struct orc_entry *orc = _orc;
> +       unsigned int num_entries = orc_ip_size / sizeof(int);
> +
> +       WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
> +                    orc_size % sizeof(*orc) != 0 ||
> +                    num_entries != orc_size / sizeof(*orc));
> +
> +       /*
> +        * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
> +        * associate an .orc_unwind_ip table entry with its corresponding
> +        * .orc_unwind entry so they can both be swapped.
> +        */
> +       mutex_lock(&sort_mutex);
> +       cur_orc_ip_table = orc_ip;
> +       cur_orc_table = orc;
> +       sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
> +       mutex_unlock(&sort_mutex);
> +
> +       mod->arch.orc_unwind_ip = orc_ip;
> +       mod->arch.orc_unwind = orc;
> +       mod->arch.num_orcs = num_entries;
> +}
> +#endif
> +
> +void __init unwind_init(void)
> +{
> +       int i;
> +       size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
> +       size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
> +       size_t num_entries = orc_ip_size / sizeof(int);
> +       struct orc_entry *orc;
> +
> +       if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
> +           orc_size % sizeof(struct orc_entry) != 0 ||
> +           num_entries != orc_size / sizeof(struct orc_entry)) {
> +               orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
> +               return;
> +       }
> +
> +       /*
> +        * Note, the orc_unwind and orc_unwind_ip tables were already
> +        * sorted at build time via the 'sorttable' tool.
> +        * It's ready for binary search straight away, no need to sort it.
> +        */
> +
> +       /* Initialize the fast lookup table: */
> +       lookup_num_blocks = orc_lookup_end - orc_lookup;
> +       for (i = 0; i < lookup_num_blocks-1; i++) {
> +               orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
> +                                num_entries, LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
> +               if (!orc) {
> +                       orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
> +                       return;
> +               }
> +
> +               orc_lookup[i] = orc - __start_orc_unwind;
> +       }
> +
> +       /* Initialize the ending block: */
> +       orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, LOOKUP_STOP_IP);
> +       if (!orc) {
> +               orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
> +               return;
> +       }
> +       orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
> +
> +       orc_init = true;
> +}
> +
> +static inline bool on_stack(struct stack_info *info, unsigned long addr, size_t len)
> +{
> +       unsigned long begin = info->begin;
> +       unsigned long end   = info->end;
> +
> +       return (info->type != STACK_TYPE_UNKNOWN &&
> +               addr >= begin && addr < end && addr + len > begin && addr + len <= end);
> +}
> +
> +static bool stack_access_ok(struct unwind_state *state, unsigned long addr, size_t len)
> +{
> +       struct stack_info *info = &state->stack_info;
> +
> +       if (on_stack(info, addr, len))
> +               return true;
> +
> +       return !get_stack_info(addr, state->task, info) && on_stack(info, addr, len);
> +}
> +
> +unsigned long unwind_get_return_address(struct unwind_state *state)
> +{
> +       return __unwind_get_return_address(state);
> +}
> +EXPORT_SYMBOL_GPL(unwind_get_return_address);
> +
> +void unwind_start(struct unwind_state *state, struct task_struct *task,
> +                   struct pt_regs *regs)
> +{
> +       __unwind_start(state, task, regs);
> +       state->type = UNWINDER_ORC;
> +       if (!unwind_done(state) && !__kernel_text_address(state->pc))
> +               unwind_next_frame(state);
> +}
> +EXPORT_SYMBOL_GPL(unwind_start);
> +
> +static bool is_entry_func(unsigned long addr)
> +{
> +       extern u32 kernel_entry;
> +       extern u32 kernel_entry_end;
> +
> +       return addr >= (unsigned long)&kernel_entry && addr < (unsigned long)&kernel_entry_end;
> +}
> +
> +static inline unsigned long bt_address(unsigned long ra)
> +{
> +       extern unsigned long eentry;
> +
> +       if (__kernel_text_address(ra))
> +               return ra;
> +
> +       if (__module_text_address(ra))
> +               return ra;
> +
> +       if (ra >= eentry && ra < eentry +  EXCCODE_INT_END * VECSIZE) {
> +               unsigned long func;
> +               unsigned long type = (ra - eentry) / VECSIZE;
> +               unsigned long offset = (ra - eentry) % VECSIZE;
> +
> +               switch (type) {
> +               case 0 ... EXCCODE_INT_START - 1:
> +                       func = (unsigned long)exception_table[type];
> +                       break;
> +               case EXCCODE_INT_START ... EXCCODE_INT_END:
> +                       func = (unsigned long)handle_vint;
> +                       break;
> +               default:
> +                       func = (unsigned long)handle_reserved;
> +                       break;
> +               }
> +
> +               return func + offset;
> +       }
> +
> +       return ra;
> +}
> +
> +bool unwind_next_frame(struct unwind_state *state)
> +{
> +       unsigned long *p, pc;
> +       struct pt_regs *regs;
> +       struct orc_entry *orc;
> +       struct stack_info *info = &state->stack_info;
> +
> +       if (unwind_done(state))
> +               return false;
> +
> +       /* Don't let modules unload while we're reading their ORC data. */
> +       preempt_disable();
> +
> +       if (is_entry_func(state->pc))
> +               goto end;
> +
> +       orc = orc_find(state->pc);
> +       if (!orc) {
> +               orc = &orc_fp_entry;
> +               state->error = true;
> +       }
> +
> +       switch (orc->sp_reg) {
> +       case ORC_REG_SP:
> +               if (info->type == STACK_TYPE_IRQ && state->sp == info->end)
> +                       orc->type = ORC_TYPE_REGS;
> +               else
> +                       state->sp = state->sp + orc->sp_offset;
> +               break;
> +       case ORC_REG_FP:
> +               state->sp = state->fp;
> +               break;
> +       default:
> +               orc_warn("unknown SP base reg %d at %pB\n", orc->sp_reg, (void *)state->pc);
> +               goto err;
> +       }
> +
> +       switch (orc->fp_reg) {
> +       case ORC_REG_PREV_SP:
> +               p = (unsigned long *)(state->sp + orc->fp_offset);
> +               if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long)))
> +                       goto err;
> +
> +               state->fp = *p;
> +               break;
> +       case ORC_REG_UNDEFINED:
> +               /* Nothing. */
> +               break;
> +       default:
> +               orc_warn("unknown FP base reg %d at %pB\n", orc->fp_reg, (void *)state->pc);
> +               goto err;
> +       }
> +
> +       switch (orc->type) {
> +       case ORC_TYPE_CALL:
> +               if (orc->ra_reg == ORC_REG_PREV_SP) {
> +                       p = (unsigned long *)(state->sp + orc->ra_offset);
> +                       if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long)))
> +                               goto err;
> +
> +                       pc = unwind_graph_addr(state, *p, state->sp);
> +                       pc -= LOONGARCH_INSN_SIZE;
> +               } else if (orc->ra_reg == ORC_REG_UNDEFINED) {
> +                       if (!state->ra || state->ra == state->pc)
> +                               goto err;
> +
> +                       pc = unwind_graph_addr(state, state->ra, state->sp);
> +                       pc -=  LOONGARCH_INSN_SIZE;
> +                       state->ra = 0;
> +               } else {
> +                       orc_warn("unknown ra base reg %d at %pB\n", orc->ra_reg, (void *)state->pc);
> +                       goto err;
> +               }
> +               break;
> +       case ORC_TYPE_REGS:
> +               if (info->type == STACK_TYPE_IRQ && state->sp == info->end)
> +                       regs = (struct pt_regs *)info->next_sp;
> +               else
> +                       regs = (struct pt_regs *)state->sp;
> +
> +               if (!stack_access_ok(state, (unsigned long)regs, sizeof(*regs)))
> +                       goto err;
> +
> +               if ((info->end == (unsigned long)regs + sizeof(*regs)) &&
> +                   !regs->regs[3] && !regs->regs[1])
> +                       goto end;
> +
> +               if (user_mode(regs))
> +                       goto end;
> +
> +               pc = regs->csr_era;
> +               if (!__kernel_text_address(pc))
> +                       goto err;
> +
> +               state->sp = regs->regs[3];
> +               state->ra = regs->regs[1];
> +               state->fp = regs->regs[22];
> +               get_stack_info(state->sp, state->task, info);
> +
> +               break;
> +       default:
> +               orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)state->pc);
> +               goto err;
> +       }
> +
> +       state->pc = bt_address(pc);
> +       if (!state->pc) {
> +               pr_err("cannot find unwind pc at %pK\n", (void *)pc);
> +               goto err;
> +       }
> +
> +       if (!__kernel_text_address(state->pc))
> +               goto err;
> +
> +       preempt_enable();
> +       return true;
> +
> +err:
> +       state->error = true;
> +
> +end:
> +       preempt_enable();
> +       state->stack_info.type = STACK_TYPE_UNKNOWN;
> +       return false;
> +}
> +EXPORT_SYMBOL_GPL(unwind_next_frame);
> diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
> index bb2ec86f37a8..eaa7a91162e6 100644
> --- a/arch/loongarch/kernel/vmlinux.lds.S
> +++ b/arch/loongarch/kernel/vmlinux.lds.S
> @@ -2,6 +2,7 @@
>  #include <linux/sizes.h>
>  #include <asm/asm-offsets.h>
>  #include <asm/thread_info.h>
> +#include <asm/orc_lookup.h>
>
>  #define PAGE_SIZE _PAGE_SIZE
>  #define RO_EXCEPTION_TABLE_ALIGN       4
> @@ -122,6 +123,8 @@ SECTIONS
>         }
>  #endif
>
> +       ORC_UNWIND_TABLE
> +
>         .sdata : {
>                 *(.sdata)
>         }
> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> index 0ed9040307b7..9d49c3f6fff5 100644
> --- a/arch/loongarch/kvm/switch.S
> +++ b/arch/loongarch/kvm/switch.S
> @@ -8,7 +8,7 @@
>  #include <asm/asmmacro.h>
>  #include <asm/loongarch.h>
>  #include <asm/regdef.h>
> -#include <asm/stackframe.h>
> +#include <asm/unwind_hints.h>
>
>  #define HGPR_OFFSET(x)         (PT_R0 + 8*x)
>  #define GGPR_OFFSET(x)         (KVM_ARCH_GGPR + 8*x)
> @@ -112,6 +112,7 @@
>         .text
>         .cfi_sections   .debug_frame
>  SYM_CODE_START(kvm_exc_entry)
> +       UNWIND_HINT_UNDEFINED
>         csrwr   a2,   KVM_TEMP_KS
>         csrrd   a2,   KVM_VCPU_KS
>         addi.d  a2,   a2, KVM_VCPU_ARCH
> @@ -248,3 +249,7 @@ SYM_FUNC_END(kvm_restore_fpu)
>         .section ".rodata"
>  SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
>  SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
> +
> +#ifdef CONFIG_CPU_HAS_LBT
> +STACK_FRAME_NON_STANDARD kvm_restore_fpu
> +#endif
> diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
> index a77bf160bfc4..e3023d9a508c 100644
> --- a/arch/loongarch/lib/Makefile
> +++ b/arch/loongarch/lib/Makefile
> @@ -3,6 +3,8 @@
>  # Makefile for LoongArch-specific library files.
>  #
>
> +OBJECT_FILES_NON_STANDARD := y
> +
>  lib-y  += delay.o memset.o memcpy.o memmove.o \
>            clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
>
> diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
> index 2c0a411f23aa..f01172a8f4e9 100644
> --- a/arch/loongarch/mm/tlb.c
> +++ b/arch/loongarch/mm/tlb.c
> @@ -9,8 +9,9 @@
>  #include <linux/hugetlb.h>
>  #include <linux/export.h>
>
> -#include <asm/cpu.h>
>  #include <asm/bootinfo.h>
> +#include <asm/cpu.h>
> +#include <asm/exception.h>
>  #include <asm/mmu_context.h>
>  #include <asm/pgtable.h>
>  #include <asm/tlb.h>
> @@ -266,24 +267,20 @@ static void setup_tlb_handler(int cpu)
>         setup_ptwalker();
>         local_flush_tlb_all();
>
> +       if (cpu_has_ptw) {
> +               exception_table[EXCCODE_TLBI] = handle_tlb_load_ptw;
> +               exception_table[EXCCODE_TLBL] = handle_tlb_load_ptw;
> +               exception_table[EXCCODE_TLBS] = handle_tlb_store_ptw;
> +               exception_table[EXCCODE_TLBM] = handle_tlb_modify_ptw;
> +       }
> +
>         /* The tlb handlers are generated only once */
>         if (cpu == 0) {
>                 memcpy((void *)tlbrentry, handle_tlb_refill, 0x80);
>                 local_flush_icache_range(tlbrentry, tlbrentry + 0x80);
> -               if (!cpu_has_ptw) {
> -                       set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
> -                       set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE);
> -                       set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE);
> -                       set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE);
> -               } else {
> -                       set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE);
> -                       set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE);
> -                       set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE);
> -                       set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE);
> -               }
> -               set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
> -               set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
> -               set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
> +
> +               for (int i = EXCCODE_TLBL; i <= EXCCODE_TLBPE; i++)
> +                       set_handler(i * VECSIZE, exception_table[i], VECSIZE);
>         }
>  #ifdef CONFIG_NUMA
>         else {
> diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
> index d5d682f3d29f..a44387b838af 100644
> --- a/arch/loongarch/mm/tlbex.S
> +++ b/arch/loongarch/mm/tlbex.S
> @@ -18,6 +18,7 @@
>
>         .macro tlb_do_page_fault, write
>         SYM_CODE_START(tlb_do_page_fault_\write)
> +       UNWIND_HINT_UNDEFINED
>         SAVE_ALL
>         csrrd           a2, LOONGARCH_CSR_BADV
>         move            a0, sp
> @@ -32,6 +33,7 @@
>         tlb_do_page_fault 1
>
>  SYM_CODE_START(handle_tlb_protect)
> +       UNWIND_HINT_UNDEFINED
>         BACKUP_T0T1
>         SAVE_ALL
>         move            a0, sp
> @@ -44,6 +46,7 @@ SYM_CODE_START(handle_tlb_protect)
>  SYM_CODE_END(handle_tlb_protect)
>
>  SYM_CODE_START(handle_tlb_load)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, EXCEPTION_KS0
>         csrwr           t1, EXCEPTION_KS1
>         csrwr           ra, EXCEPTION_KS2
> @@ -190,6 +193,7 @@ nopage_tlb_load:
>  SYM_CODE_END(handle_tlb_load)
>
>  SYM_CODE_START(handle_tlb_load_ptw)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, LOONGARCH_CSR_KS0
>         csrwr           t1, LOONGARCH_CSR_KS1
>         la_abs          t0, tlb_do_page_fault_0
> @@ -197,6 +201,7 @@ SYM_CODE_START(handle_tlb_load_ptw)
>  SYM_CODE_END(handle_tlb_load_ptw)
>
>  SYM_CODE_START(handle_tlb_store)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, EXCEPTION_KS0
>         csrwr           t1, EXCEPTION_KS1
>         csrwr           ra, EXCEPTION_KS2
> @@ -346,6 +351,7 @@ nopage_tlb_store:
>  SYM_CODE_END(handle_tlb_store)
>
>  SYM_CODE_START(handle_tlb_store_ptw)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, LOONGARCH_CSR_KS0
>         csrwr           t1, LOONGARCH_CSR_KS1
>         la_abs          t0, tlb_do_page_fault_1
> @@ -353,6 +359,7 @@ SYM_CODE_START(handle_tlb_store_ptw)
>  SYM_CODE_END(handle_tlb_store_ptw)
>
>  SYM_CODE_START(handle_tlb_modify)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, EXCEPTION_KS0
>         csrwr           t1, EXCEPTION_KS1
>         csrwr           ra, EXCEPTION_KS2
> @@ -500,6 +507,7 @@ nopage_tlb_modify:
>  SYM_CODE_END(handle_tlb_modify)
>
>  SYM_CODE_START(handle_tlb_modify_ptw)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, LOONGARCH_CSR_KS0
>         csrwr           t1, LOONGARCH_CSR_KS1
>         la_abs          t0, tlb_do_page_fault_1
> @@ -507,6 +515,7 @@ SYM_CODE_START(handle_tlb_modify_ptw)
>  SYM_CODE_END(handle_tlb_modify_ptw)
>
>  SYM_CODE_START(handle_tlb_refill)
> +       UNWIND_HINT_UNDEFINED
>         csrwr           t0, LOONGARCH_CSR_TLBRSAVE
>         csrrd           t0, LOONGARCH_CSR_PGD
>         lddir           t0, t0, 3
> diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
> index c74c9921304f..7a4ad96522ac 100644
> --- a/arch/loongarch/vdso/Makefile
> +++ b/arch/loongarch/vdso/Makefile
> @@ -3,6 +3,7 @@
>
>  KASAN_SANITIZE := n
>  KCOV_INSTRUMENT := n
> +OBJECT_FILES_NON_STANDARD := y
>
>  # Include the generic Makefile to check the built vdso.
>  include $(srctree)/lib/vdso/Makefile
> diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> index bb1339c7057b..39f2d4a05208 100644
> --- a/include/linux/compiler.h
> +++ b/include/linux/compiler.h
> @@ -116,6 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
>   */
>  #define __stringify_label(n) #n
>
> +#define __annotate_reachable(c) ({                                     \
> +       asm volatile(__stringify_label(c) ":\n\t"                       \
> +                       ".pushsection .discard.reachable\n\t"           \
> +                       ".long " __stringify_label(c) "b - .\n\t"       \
> +                       ".popsection\n\t");                             \
> +})
> +#define annotate_reachable() __annotate_reachable(__COUNTER__)
> +
>  #define __annotate_unreachable(c) ({                                   \
>         asm volatile(__stringify_label(c) ":\n\t"                       \
>                      ".pushsection .discard.unreachable\n\t"            \
> @@ -128,6 +136,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
>  #define __annotate_jump_table __section(".rodata..c_jump_table")
>
>  #else /* !CONFIG_OBJTOOL */
> +#define annotate_reachable()
>  #define annotate_unreachable()
>  #define __annotate_jump_table
>  #endif /* CONFIG_OBJTOOL */
> diff --git a/scripts/Makefile b/scripts/Makefile
> index 576cf64be667..e4cca53d2285 100644
> --- a/scripts/Makefile
> +++ b/scripts/Makefile
> @@ -31,9 +31,12 @@ HOSTLDLIBS_sign-file = $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null |
>
>  ifdef CONFIG_UNWINDER_ORC
>  ifeq ($(ARCH),x86_64)
> -ARCH := x86
> +SRCARCH := x86
>  endif
> -HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include
> +ifeq ($(ARCH),loongarch)
> +SRCARCH := loongarch
> +endif
> +HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/$(SRCARCH)/include
>  HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED
>  endif
>
> --
> 2.42.0
>
>
Re: [PATCH v5 8/8] LoongArch: Add ORC stack unwinder support
Posted by Tiezhu Yang 2 years ago

On 12/02/2023 09:45 PM, Huacai Chen wrote:
> Hi, Tiezhu,
>
> On Wed, Nov 29, 2023 at 9:07 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>>
>> The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
>> similar in concept to a DWARF unwinder. The difference is that the format
>> of the ORC data is much simpler than DWARF, which in turn allows the ORC
>> unwinder to be much simpler and faster.

...

>> diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
>> index 9c75120a26d8..4d6914f84c7e 100644
>> --- a/arch/loongarch/kernel/lbt.S
>> +++ b/arch/loongarch/kernel/lbt.S
>> @@ -11,6 +11,7 @@
>>  #include <asm/asm-offsets.h>
>>  #include <asm/errno.h>
>>  #include <asm/regdef.h>
>> +#include <asm/unwind_hints.h>
>>
>>  #define SCR_REG_WIDTH 8
>>
>> @@ -153,3 +154,7 @@ SYM_FUNC_END(_restore_ftop_context)
>>  .L_lbt_fault:
>>         li.w            a0, -EFAULT             # failure
>>         jr              ra
>> +
>> +#ifdef CONFIG_CPU_HAS_LBT
>> +STACK_FRAME_NON_STANDARD _restore_ftop_context
>> +#endif
> This file is only compiled if CONFIG_CPU_HAS_LBT is set, so #ifdef can
> be removed.

OK, will remove it in the next version.

Thanks,
Tiezhu