[PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue

Xianglai Li posted 2 patches 1 month, 2 weeks ago
[PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Xianglai Li 1 month, 2 weeks ago
Insert the appropriate UNWIND macro definition into the kvm_exc_entry in
the assembly function to guide the generation of correct ORC table entries,
thereby solving the timeout problem of loading the livepatch-sample module
on a physical machine running multiple vcpus virtual machines.

While solving the above problems, we have gained an additional benefit,
that is, we can obtain more call stack information

Stack information that can be obtained before the problem is fixed:
[<0>] kvm_vcpu_block+0x88/0x120 [kvm]
[<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
[<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
[<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
[<0>] kvm_handle_exit+0x160/0x270 [kvm]
[<0>] kvm_exc_entry+0x100/0x1e0

Stack information that can be obtained after the problem is fixed:
[<0>] kvm_vcpu_block+0x88/0x120 [kvm]
[<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
[<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
[<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
[<0>] kvm_handle_exit+0x160/0x270 [kvm]
[<0>] kvm_exc_entry+0x104/0x1e4
[<0>] kvm_enter_guest+0x38/0x11c
[<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
[<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
[<0>] sys_ioctl+0x498/0xf00
[<0>] do_syscall+0x98/0x1d0
[<0>] handle_syscall+0xb8/0x158

Cc: stable@vger.kernel.org
Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
---
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
Cc: Bibo Mao <maobibo@loongson.cn>
Cc: Charlie Jenkins <charlie@rivosinc.com>
Cc: Xianglai Li <lixianglai@loongson.cn>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tiezhu Yang <yangtiezhu@loongson.cn>

 arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index 93845ce53651..a3ea9567dbe5 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -10,6 +10,7 @@
 #include <asm/loongarch.h>
 #include <asm/regdef.h>
 #include <asm/unwind_hints.h>
+#include <linux/kvm_types.h>
 
 #define HGPR_OFFSET(x)		(PT_R0 + 8*x)
 #define GGPR_OFFSET(x)		(KVM_ARCH_GGPR + 8*x)
@@ -110,9 +111,9 @@
 	 * need to copy world switch code to DMW area.
 	 */
 	.text
+	.p2align PAGE_SHIFT
 	.cfi_sections	.debug_frame
 SYM_CODE_START(kvm_exc_entry)
-	.p2align PAGE_SHIFT
 	UNWIND_HINT_UNDEFINED
 	csrwr	a2,   KVM_TEMP_KS
 	csrrd	a2,   KVM_VCPU_KS
@@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
 	/* restore per cpu register */
 	ld.d	u0, a2, KVM_ARCH_HPERCPU
 	addi.d	sp, sp, -PT_SIZE
+	UNWIND_HINT_REGS
 
 	/* Prepare handle exception */
 	or	a0, s0, zero
@@ -200,7 +202,7 @@ ret_to_host:
 	jr      ra
 
 SYM_CODE_END(kvm_exc_entry)
-EXPORT_SYMBOL(kvm_exc_entry)
+EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
 
 /*
  * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
@@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
 	/* Save host GPRs */
 	kvm_save_host_gpr a2
 
+	/*
+	 * The csr_era member variable of the pt_regs structure is required
+	 * for unwinding orc to perform stack traceback, so we need to put
+	 * pc into csr_era member variable here.
+	 */
+	pcaddi	t0, 0
+	st.d	t0, a2, PT_ERA
+
 	addi.d	a2, a1, KVM_VCPU_ARCH
 	st.d	sp, a2, KVM_ARCH_HSP
 	st.d	tp, a2, KVM_ARCH_HTP
@@ -225,7 +235,7 @@ SYM_FUNC_START(kvm_enter_guest)
 	csrwr	a1, KVM_VCPU_KS
 	kvm_switch_to_guest
 SYM_FUNC_END(kvm_enter_guest)
-EXPORT_SYMBOL(kvm_enter_guest)
+EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest)
 
 SYM_FUNC_START(kvm_save_fpu)
 	fpu_save_csr	a0 t1
@@ -233,7 +243,7 @@ SYM_FUNC_START(kvm_save_fpu)
 	fpu_save_cc	a0 t1 t2
 	jr              ra
 SYM_FUNC_END(kvm_save_fpu)
-EXPORT_SYMBOL(kvm_save_fpu)
+EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu)
 
 SYM_FUNC_START(kvm_restore_fpu)
 	fpu_restore_double a0 t1
@@ -241,7 +251,7 @@ SYM_FUNC_START(kvm_restore_fpu)
 	fpu_restore_cc	   a0 t1 t2
 	jr                 ra
 SYM_FUNC_END(kvm_restore_fpu)
-EXPORT_SYMBOL(kvm_restore_fpu)
+EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu)
 
 #ifdef CONFIG_CPU_HAS_LSX
 SYM_FUNC_START(kvm_save_lsx)
@@ -250,7 +260,7 @@ SYM_FUNC_START(kvm_save_lsx)
 	lsx_save_data   a0 t1
 	jr              ra
 SYM_FUNC_END(kvm_save_lsx)
-EXPORT_SYMBOL(kvm_save_lsx)
+EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx)
 
 SYM_FUNC_START(kvm_restore_lsx)
 	lsx_restore_data a0 t1
@@ -258,7 +268,7 @@ SYM_FUNC_START(kvm_restore_lsx)
 	fpu_restore_csr  a0 t1 t2
 	jr               ra
 SYM_FUNC_END(kvm_restore_lsx)
-EXPORT_SYMBOL(kvm_restore_lsx)
+EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx)
 #endif
 
 #ifdef CONFIG_CPU_HAS_LASX
@@ -268,7 +278,7 @@ SYM_FUNC_START(kvm_save_lasx)
 	lasx_save_data  a0 t1
 	jr              ra
 SYM_FUNC_END(kvm_save_lasx)
-EXPORT_SYMBOL(kvm_save_lasx)
+EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx)
 
 SYM_FUNC_START(kvm_restore_lasx)
 	lasx_restore_data a0 t1
@@ -276,7 +286,7 @@ SYM_FUNC_START(kvm_restore_lasx)
 	fpu_restore_csr   a0 t1 t2
 	jr                ra
 SYM_FUNC_END(kvm_restore_lasx)
-EXPORT_SYMBOL(kvm_restore_lasx)
+EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx)
 #endif
 
 #ifdef CONFIG_CPU_HAS_LBT
-- 
2.39.1
Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Bibo Mao 1 month, 1 week ago

On 2025/12/27 上午9:27, Xianglai Li wrote:
> Insert the appropriate UNWIND macro definition into the kvm_exc_entry in
> the assembly function to guide the generation of correct ORC table entries,
> thereby solving the timeout problem of loading the livepatch-sample module
> on a physical machine running multiple vcpus virtual machines.
> 
> While solving the above problems, we have gained an additional benefit,
> that is, we can obtain more call stack information
> 
> Stack information that can be obtained before the problem is fixed:
> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> [<0>] kvm_exc_entry+0x100/0x1e0
> 
> Stack information that can be obtained after the problem is fixed:
> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> [<0>] kvm_exc_entry+0x104/0x1e4
> [<0>] kvm_enter_guest+0x38/0x11c
> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
> [<0>] sys_ioctl+0x498/0xf00
> [<0>] do_syscall+0x98/0x1d0
> [<0>] handle_syscall+0xb8/0x158
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
> ---
> Cc: Huacai Chen <chenhuacai@kernel.org>
> Cc: WANG Xuerui <kernel@xen0n.name>
> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
> Cc: Bibo Mao <maobibo@loongson.cn>
> Cc: Charlie Jenkins <charlie@rivosinc.com>
> Cc: Xianglai Li <lixianglai@loongson.cn>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
> 
>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>   1 file changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> index 93845ce53651..a3ea9567dbe5 100644
> --- a/arch/loongarch/kvm/switch.S
> +++ b/arch/loongarch/kvm/switch.S
> @@ -10,6 +10,7 @@
>   #include <asm/loongarch.h>
>   #include <asm/regdef.h>
>   #include <asm/unwind_hints.h>
> +#include <linux/kvm_types.h>
>   
>   #define HGPR_OFFSET(x)		(PT_R0 + 8*x)
>   #define GGPR_OFFSET(x)		(KVM_ARCH_GGPR + 8*x)
> @@ -110,9 +111,9 @@
>   	 * need to copy world switch code to DMW area.
>   	 */
>   	.text
> +	.p2align PAGE_SHIFT
>   	.cfi_sections	.debug_frame
>   SYM_CODE_START(kvm_exc_entry)
> -	.p2align PAGE_SHIFT
>   	UNWIND_HINT_UNDEFINED
>   	csrwr	a2,   KVM_TEMP_KS
>   	csrrd	a2,   KVM_VCPU_KS
> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>   	/* restore per cpu register */
>   	ld.d	u0, a2, KVM_ARCH_HPERCPU
>   	addi.d	sp, sp, -PT_SIZE
> +	UNWIND_HINT_REGS
>   
>   	/* Prepare handle exception */
>   	or	a0, s0, zero
> @@ -200,7 +202,7 @@ ret_to_host:
>   	jr      ra
>   
>   SYM_CODE_END(kvm_exc_entry)
> -EXPORT_SYMBOL(kvm_exc_entry)
> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>   
>   /*
>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>   	/* Save host GPRs */
>   	kvm_save_host_gpr a2
>   
> +	/*
> +	 * The csr_era member variable of the pt_regs structure is required
> +	 * for unwinding orc to perform stack traceback, so we need to put
> +	 * pc into csr_era member variable here.
> +	 */
> +	pcaddi	t0, 0
> +	st.d	t0, a2, PT_ERA
maybe PRMD need be set with fake pt_regs also, something like this:
         ori     t0, zero, CSR_PRMD_PIE
         st.d	t0, a2, PT_PRMD

Regards
Bibo Mao
> +
>   	addi.d	a2, a1, KVM_VCPU_ARCH
>   	st.d	sp, a2, KVM_ARCH_HSP
>   	st.d	tp, a2, KVM_ARCH_HTP
> @@ -225,7 +235,7 @@ SYM_FUNC_START(kvm_enter_guest)
>   	csrwr	a1, KVM_VCPU_KS
>   	kvm_switch_to_guest
>   SYM_FUNC_END(kvm_enter_guest)
> -EXPORT_SYMBOL(kvm_enter_guest)
> +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest)
>   
>   SYM_FUNC_START(kvm_save_fpu)
>   	fpu_save_csr	a0 t1
> @@ -233,7 +243,7 @@ SYM_FUNC_START(kvm_save_fpu)
>   	fpu_save_cc	a0 t1 t2
>   	jr              ra
>   SYM_FUNC_END(kvm_save_fpu)
> -EXPORT_SYMBOL(kvm_save_fpu)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu)
>   
>   SYM_FUNC_START(kvm_restore_fpu)
>   	fpu_restore_double a0 t1
> @@ -241,7 +251,7 @@ SYM_FUNC_START(kvm_restore_fpu)
>   	fpu_restore_cc	   a0 t1 t2
>   	jr                 ra
>   SYM_FUNC_END(kvm_restore_fpu)
> -EXPORT_SYMBOL(kvm_restore_fpu)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu)
>   
>   #ifdef CONFIG_CPU_HAS_LSX
>   SYM_FUNC_START(kvm_save_lsx)
> @@ -250,7 +260,7 @@ SYM_FUNC_START(kvm_save_lsx)
>   	lsx_save_data   a0 t1
>   	jr              ra
>   SYM_FUNC_END(kvm_save_lsx)
> -EXPORT_SYMBOL(kvm_save_lsx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx)
>   
>   SYM_FUNC_START(kvm_restore_lsx)
>   	lsx_restore_data a0 t1
> @@ -258,7 +268,7 @@ SYM_FUNC_START(kvm_restore_lsx)
>   	fpu_restore_csr  a0 t1 t2
>   	jr               ra
>   SYM_FUNC_END(kvm_restore_lsx)
> -EXPORT_SYMBOL(kvm_restore_lsx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx)
>   #endif
>   
>   #ifdef CONFIG_CPU_HAS_LASX
> @@ -268,7 +278,7 @@ SYM_FUNC_START(kvm_save_lasx)
>   	lasx_save_data  a0 t1
>   	jr              ra
>   SYM_FUNC_END(kvm_save_lasx)
> -EXPORT_SYMBOL(kvm_save_lasx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx)
>   
>   SYM_FUNC_START(kvm_restore_lasx)
>   	lasx_restore_data a0 t1
> @@ -276,7 +286,7 @@ SYM_FUNC_START(kvm_restore_lasx)
>   	fpu_restore_csr   a0 t1 t2
>   	jr                ra
>   SYM_FUNC_END(kvm_restore_lasx)
> -EXPORT_SYMBOL(kvm_restore_lasx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx)
>   #endif
>   
>   #ifdef CONFIG_CPU_HAS_LBT
> 

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by lixianglai 1 month, 1 week ago
Hi Bibo Mao:
>
>
> On 2025/12/27 上午9:27, Xianglai Li wrote:
>> Insert the appropriate UNWIND macro definition into the kvm_exc_entry in
>> the assembly function to guide the generation of correct ORC table 
>> entries,
>> thereby solving the timeout problem of loading the livepatch-sample 
>> module
>> on a physical machine running multiple vcpus virtual machines.
>>
>> While solving the above problems, we have gained an additional benefit,
>> that is, we can obtain more call stack information
>>
>> Stack information that can be obtained before the problem is fixed:
>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>> [<0>] kvm_exc_entry+0x100/0x1e0
>>
>> Stack information that can be obtained after the problem is fixed:
>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>> [<0>] kvm_exc_entry+0x104/0x1e4
>> [<0>] kvm_enter_guest+0x38/0x11c
>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>> [<0>] sys_ioctl+0x498/0xf00
>> [<0>] do_syscall+0x98/0x1d0
>> [<0>] handle_syscall+0xb8/0x158
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>> ---
>> Cc: Huacai Chen <chenhuacai@kernel.org>
>> Cc: WANG Xuerui <kernel@xen0n.name>
>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>> Cc: Bibo Mao <maobibo@loongson.cn>
>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>> Cc: Xianglai Li <lixianglai@loongson.cn>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>
>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>> index 93845ce53651..a3ea9567dbe5 100644
>> --- a/arch/loongarch/kvm/switch.S
>> +++ b/arch/loongarch/kvm/switch.S
>> @@ -10,6 +10,7 @@
>>   #include <asm/loongarch.h>
>>   #include <asm/regdef.h>
>>   #include <asm/unwind_hints.h>
>> +#include <linux/kvm_types.h>
>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>> @@ -110,9 +111,9 @@
>>        * need to copy world switch code to DMW area.
>>        */
>>       .text
>> +    .p2align PAGE_SHIFT
>>       .cfi_sections    .debug_frame
>>   SYM_CODE_START(kvm_exc_entry)
>> -    .p2align PAGE_SHIFT
>>       UNWIND_HINT_UNDEFINED
>>       csrwr    a2,   KVM_TEMP_KS
>>       csrrd    a2,   KVM_VCPU_KS
>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>       /* restore per cpu register */
>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>       addi.d    sp, sp, -PT_SIZE
>> +    UNWIND_HINT_REGS
>>         /* Prepare handle exception */
>>       or    a0, s0, zero
>> @@ -200,7 +202,7 @@ ret_to_host:
>>       jr      ra
>>     SYM_CODE_END(kvm_exc_entry)
>> -EXPORT_SYMBOL(kvm_exc_entry)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>     /*
>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>       /* Save host GPRs */
>>       kvm_save_host_gpr a2
>>   +    /*
>> +     * The csr_era member variable of the pt_regs structure is required
>> +     * for unwinding orc to perform stack traceback, so we need to put
>> +     * pc into csr_era member variable here.
>> +     */
>> +    pcaddi    t0, 0
>> +    st.d    t0, a2, PT_ERA
> maybe PRMD need be set with fake pt_regs also, something like this:
>         ori     t0, zero, CSR_PRMD_PIE
>         st.d    t0, a2, PT_PRMD
>
Ok, I will fix it in the next version

Thanks,
Xianglai.
> Regards
> Bibo Mao
>> +
>>       addi.d    a2, a1, KVM_VCPU_ARCH
>>       st.d    sp, a2, KVM_ARCH_HSP
>>       st.d    tp, a2, KVM_ARCH_HTP
>> @@ -225,7 +235,7 @@ SYM_FUNC_START(kvm_enter_guest)
>>       csrwr    a1, KVM_VCPU_KS
>>       kvm_switch_to_guest
>>   SYM_FUNC_END(kvm_enter_guest)
>> -EXPORT_SYMBOL(kvm_enter_guest)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest)
>>     SYM_FUNC_START(kvm_save_fpu)
>>       fpu_save_csr    a0 t1
>> @@ -233,7 +243,7 @@ SYM_FUNC_START(kvm_save_fpu)
>>       fpu_save_cc    a0 t1 t2
>>       jr              ra
>>   SYM_FUNC_END(kvm_save_fpu)
>> -EXPORT_SYMBOL(kvm_save_fpu)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu)
>>     SYM_FUNC_START(kvm_restore_fpu)
>>       fpu_restore_double a0 t1
>> @@ -241,7 +251,7 @@ SYM_FUNC_START(kvm_restore_fpu)
>>       fpu_restore_cc       a0 t1 t2
>>       jr                 ra
>>   SYM_FUNC_END(kvm_restore_fpu)
>> -EXPORT_SYMBOL(kvm_restore_fpu)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu)
>>     #ifdef CONFIG_CPU_HAS_LSX
>>   SYM_FUNC_START(kvm_save_lsx)
>> @@ -250,7 +260,7 @@ SYM_FUNC_START(kvm_save_lsx)
>>       lsx_save_data   a0 t1
>>       jr              ra
>>   SYM_FUNC_END(kvm_save_lsx)
>> -EXPORT_SYMBOL(kvm_save_lsx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx)
>>     SYM_FUNC_START(kvm_restore_lsx)
>>       lsx_restore_data a0 t1
>> @@ -258,7 +268,7 @@ SYM_FUNC_START(kvm_restore_lsx)
>>       fpu_restore_csr  a0 t1 t2
>>       jr               ra
>>   SYM_FUNC_END(kvm_restore_lsx)
>> -EXPORT_SYMBOL(kvm_restore_lsx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx)
>>   #endif
>>     #ifdef CONFIG_CPU_HAS_LASX
>> @@ -268,7 +278,7 @@ SYM_FUNC_START(kvm_save_lasx)
>>       lasx_save_data  a0 t1
>>       jr              ra
>>   SYM_FUNC_END(kvm_save_lasx)
>> -EXPORT_SYMBOL(kvm_save_lasx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx)
>>     SYM_FUNC_START(kvm_restore_lasx)
>>       lasx_restore_data a0 t1
>> @@ -276,7 +286,7 @@ SYM_FUNC_START(kvm_restore_lasx)
>>       fpu_restore_csr   a0 t1 t2
>>       jr                ra
>>   SYM_FUNC_END(kvm_restore_lasx)
>> -EXPORT_SYMBOL(kvm_restore_lasx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx)
>>   #endif
>>     #ifdef CONFIG_CPU_HAS_LBT
>>

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Jinyang He 1 month, 1 week ago
On 2025-12-27 09:27, Xianglai Li wrote:

> Insert the appropriate UNWIND macro definition into the kvm_exc_entry in
> the assembly function to guide the generation of correct ORC table entries,
> thereby solving the timeout problem of loading the livepatch-sample module
> on a physical machine running multiple vcpus virtual machines.
>
> While solving the above problems, we have gained an additional benefit,
> that is, we can obtain more call stack information
>
> Stack information that can be obtained before the problem is fixed:
> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> [<0>] kvm_exc_entry+0x100/0x1e0
>
> Stack information that can be obtained after the problem is fixed:
> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> [<0>] kvm_exc_entry+0x104/0x1e4
> [<0>] kvm_enter_guest+0x38/0x11c
> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
> [<0>] sys_ioctl+0x498/0xf00
> [<0>] do_syscall+0x98/0x1d0
> [<0>] handle_syscall+0xb8/0x158
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
> ---
> Cc: Huacai Chen <chenhuacai@kernel.org>
> Cc: WANG Xuerui <kernel@xen0n.name>
> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
> Cc: Bibo Mao <maobibo@loongson.cn>
> Cc: Charlie Jenkins <charlie@rivosinc.com>
> Cc: Xianglai Li <lixianglai@loongson.cn>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>
>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>   1 file changed, 19 insertions(+), 9 deletions(-)
>
> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> index 93845ce53651..a3ea9567dbe5 100644
> --- a/arch/loongarch/kvm/switch.S
> +++ b/arch/loongarch/kvm/switch.S
> @@ -10,6 +10,7 @@
>   #include <asm/loongarch.h>
>   #include <asm/regdef.h>
>   #include <asm/unwind_hints.h>
> +#include <linux/kvm_types.h>
>   
>   #define HGPR_OFFSET(x)		(PT_R0 + 8*x)
>   #define GGPR_OFFSET(x)		(KVM_ARCH_GGPR + 8*x)
> @@ -110,9 +111,9 @@
>   	 * need to copy world switch code to DMW area.
>   	 */
>   	.text
> +	.p2align PAGE_SHIFT
>   	.cfi_sections	.debug_frame
>   SYM_CODE_START(kvm_exc_entry)
> -	.p2align PAGE_SHIFT
>   	UNWIND_HINT_UNDEFINED
>   	csrwr	a2,   KVM_TEMP_KS
>   	csrrd	a2,   KVM_VCPU_KS
> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>   	/* restore per cpu register */
>   	ld.d	u0, a2, KVM_ARCH_HPERCPU
>   	addi.d	sp, sp, -PT_SIZE
> +	UNWIND_HINT_REGS
>   
>   	/* Prepare handle exception */
>   	or	a0, s0, zero
> @@ -200,7 +202,7 @@ ret_to_host:
>   	jr      ra
>   
>   SYM_CODE_END(kvm_exc_entry)
> -EXPORT_SYMBOL(kvm_exc_entry)
> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>   
>   /*
>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>   	/* Save host GPRs */
>   	kvm_save_host_gpr a2
>   
> +	/*
> +	 * The csr_era member variable of the pt_regs structure is required
> +	 * for unwinding orc to perform stack traceback, so we need to put
> +	 * pc into csr_era member variable here.
> +	 */
> +	pcaddi	t0, 0
> +	st.d	t0, a2, PT_ERA
Hi, Xianglai,

It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather than
`SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark "C-likely"
asm functionw. I guess the kvm_enter_guest is something like exception
handler becuase the last instruction is "ertn". So usually it should
mark UNWIND_HINT_REGS where can find last frame info by "$sp".
However, all info is store to "$a2", this mark should be
   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
I don't konw why save this function internal PC here by `pcaddi t0, 0`,
and I think it is no meaning(, for exception handler, they save last PC
by read CSR.ERA). The `kvm_enter_guest` saves registers by
"$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
is enable. So I wonder if there is really a stacktrace through this 
function?

Jinyang


> +
>   	addi.d	a2, a1, KVM_VCPU_ARCH
>   	st.d	sp, a2, KVM_ARCH_HSP
>   	st.d	tp, a2, KVM_ARCH_HTP
> @@ -225,7 +235,7 @@ SYM_FUNC_START(kvm_enter_guest)
>   	csrwr	a1, KVM_VCPU_KS
>   	kvm_switch_to_guest
>   SYM_FUNC_END(kvm_enter_guest)
> -EXPORT_SYMBOL(kvm_enter_guest)
> +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest)
>   
>   SYM_FUNC_START(kvm_save_fpu)
>   	fpu_save_csr	a0 t1
> @@ -233,7 +243,7 @@ SYM_FUNC_START(kvm_save_fpu)
>   	fpu_save_cc	a0 t1 t2
>   	jr              ra
>   SYM_FUNC_END(kvm_save_fpu)
> -EXPORT_SYMBOL(kvm_save_fpu)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu)
>   
>   SYM_FUNC_START(kvm_restore_fpu)
>   	fpu_restore_double a0 t1
> @@ -241,7 +251,7 @@ SYM_FUNC_START(kvm_restore_fpu)
>   	fpu_restore_cc	   a0 t1 t2
>   	jr                 ra
>   SYM_FUNC_END(kvm_restore_fpu)
> -EXPORT_SYMBOL(kvm_restore_fpu)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu)
>   
>   #ifdef CONFIG_CPU_HAS_LSX
>   SYM_FUNC_START(kvm_save_lsx)
> @@ -250,7 +260,7 @@ SYM_FUNC_START(kvm_save_lsx)
>   	lsx_save_data   a0 t1
>   	jr              ra
>   SYM_FUNC_END(kvm_save_lsx)
> -EXPORT_SYMBOL(kvm_save_lsx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx)
>   
>   SYM_FUNC_START(kvm_restore_lsx)
>   	lsx_restore_data a0 t1
> @@ -258,7 +268,7 @@ SYM_FUNC_START(kvm_restore_lsx)
>   	fpu_restore_csr  a0 t1 t2
>   	jr               ra
>   SYM_FUNC_END(kvm_restore_lsx)
> -EXPORT_SYMBOL(kvm_restore_lsx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx)
>   #endif
>   
>   #ifdef CONFIG_CPU_HAS_LASX
> @@ -268,7 +278,7 @@ SYM_FUNC_START(kvm_save_lasx)
>   	lasx_save_data  a0 t1
>   	jr              ra
>   SYM_FUNC_END(kvm_save_lasx)
> -EXPORT_SYMBOL(kvm_save_lasx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx)
>   
>   SYM_FUNC_START(kvm_restore_lasx)
>   	lasx_restore_data a0 t1
> @@ -276,7 +286,7 @@ SYM_FUNC_START(kvm_restore_lasx)
>   	fpu_restore_csr   a0 t1 t2
>   	jr                ra
>   SYM_FUNC_END(kvm_restore_lasx)
> -EXPORT_SYMBOL(kvm_restore_lasx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx)
>   #endif
>   
>   #ifdef CONFIG_CPU_HAS_LBT

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by lixianglai 1 month, 1 week ago
Hi Jinyang:
> On 2025-12-27 09:27, Xianglai Li wrote:
>
>> Insert the appropriate UNWIND macro definition into the kvm_exc_entry in
>> the assembly function to guide the generation of correct ORC table 
>> entries,
>> thereby solving the timeout problem of loading the livepatch-sample 
>> module
>> on a physical machine running multiple vcpus virtual machines.
>>
>> While solving the above problems, we have gained an additional benefit,
>> that is, we can obtain more call stack information
>>
>> Stack information that can be obtained before the problem is fixed:
>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>> [<0>] kvm_exc_entry+0x100/0x1e0
>>
>> Stack information that can be obtained after the problem is fixed:
>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>> [<0>] kvm_exc_entry+0x104/0x1e4
>> [<0>] kvm_enter_guest+0x38/0x11c
>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>> [<0>] sys_ioctl+0x498/0xf00
>> [<0>] do_syscall+0x98/0x1d0
>> [<0>] handle_syscall+0xb8/0x158
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>> ---
>> Cc: Huacai Chen <chenhuacai@kernel.org>
>> Cc: WANG Xuerui <kernel@xen0n.name>
>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>> Cc: Bibo Mao <maobibo@loongson.cn>
>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>> Cc: Xianglai Li <lixianglai@loongson.cn>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>
>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>> index 93845ce53651..a3ea9567dbe5 100644
>> --- a/arch/loongarch/kvm/switch.S
>> +++ b/arch/loongarch/kvm/switch.S
>> @@ -10,6 +10,7 @@
>>   #include <asm/loongarch.h>
>>   #include <asm/regdef.h>
>>   #include <asm/unwind_hints.h>
>> +#include <linux/kvm_types.h>
>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>> @@ -110,9 +111,9 @@
>>        * need to copy world switch code to DMW area.
>>        */
>>       .text
>> +    .p2align PAGE_SHIFT
>>       .cfi_sections    .debug_frame
>>   SYM_CODE_START(kvm_exc_entry)
>> -    .p2align PAGE_SHIFT
>>       UNWIND_HINT_UNDEFINED
>>       csrwr    a2,   KVM_TEMP_KS
>>       csrrd    a2,   KVM_VCPU_KS
>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>       /* restore per cpu register */
>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>       addi.d    sp, sp, -PT_SIZE
>> +    UNWIND_HINT_REGS
>>         /* Prepare handle exception */
>>       or    a0, s0, zero
>> @@ -200,7 +202,7 @@ ret_to_host:
>>       jr      ra
>>     SYM_CODE_END(kvm_exc_entry)
>> -EXPORT_SYMBOL(kvm_exc_entry)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>     /*
>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>       /* Save host GPRs */
>>       kvm_save_host_gpr a2
>>   +    /*
>> +     * The csr_era member variable of the pt_regs structure is required
>> +     * for unwinding orc to perform stack traceback, so we need to put
>> +     * pc into csr_era member variable here.
>> +     */
>> +    pcaddi    t0, 0
>> +    st.d    t0, a2, PT_ERA
> Hi, Xianglai,
>
> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather than
> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark "C-likely"
> asm functionw. 

Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next version.

> I guess the kvm_enter_guest is something like exception
> handler becuase the last instruction is "ertn". So usually it should
> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
> However, all info is store to "$a2", this mark should be
>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
> I don't konw why save this function internal PC here by `pcaddi t0, 0`,
> and I think it is no meaning(, for exception handler, they save last PC
> by read CSR.ERA). The `kvm_enter_guest` saves registers by
> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
> is enable. So I wonder if there is really a stacktrace through this 
> function?
>
The stack backtracking issue in switch.S is rather complex because it 
involves the switching between cpu root-mode and guest-mode:
Real stack backtracking should be divided into two parts:
part 1:
     [<0>] kvm_enter_guest+0x38/0x11c
     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
     [<0>] sys_ioctl+0x498/0xf00
     [<0>] do_syscall+0x98/0x1d0
     [<0>] handle_syscall+0xb8/0x158

part 2:
     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
     [<0>] kvm_exc_entry+0x104/0x1e4


In "part 1", after executing kvm_enter_guest, the cpu switches from 
root-mode to guest-mode.
In this case, stack backtracking is indeed very rare.

In "part 2", the cpu switches from the guest-mode to the root-mode,
and most of the stack backtracking occurs during this phase.

To obtain the longest call chain, we save pc in kvm_enter_guest to 
pt_regs.csr_era,
and after restoring the sp of the root-mode cpu in kvm_exc_entry,
The ORC entry was re-established using "UNWIND_HINT_REGS",
  and then we obtained the following stack backtrace as we wanted:

     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
     [<0>] kvm_exc_entry+0x104/0x1e4
     [<0>] kvm_enter_guest+0x38/0x11c
     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
     [<0>] sys_ioctl+0x498/0xf00
     [<0>] do_syscall+0x98/0x1d0
     [<0>] handle_syscall+0xb8/0x158

Doing so is equivalent to ignoring the details of the cpu root-mode and 
guest-mode switching.
About what you said in the IE enable phase is dangerous,
interrupts are always off during the cpu root-mode and guest-mode 
switching in kvm_enter_guest and kvm_exc_entry.

Thanks!
Xianglai.

> Jinyang
>
>
>> +
>>       addi.d    a2, a1, KVM_VCPU_ARCH
>>       st.d    sp, a2, KVM_ARCH_HSP
>>       st.d    tp, a2, KVM_ARCH_HTP
>> @@ -225,7 +235,7 @@ SYM_FUNC_START(kvm_enter_guest)
>>       csrwr    a1, KVM_VCPU_KS
>>       kvm_switch_to_guest
>>   SYM_FUNC_END(kvm_enter_guest)
>> -EXPORT_SYMBOL(kvm_enter_guest)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest)
>>     SYM_FUNC_START(kvm_save_fpu)
>>       fpu_save_csr    a0 t1
>> @@ -233,7 +243,7 @@ SYM_FUNC_START(kvm_save_fpu)
>>       fpu_save_cc    a0 t1 t2
>>       jr              ra
>>   SYM_FUNC_END(kvm_save_fpu)
>> -EXPORT_SYMBOL(kvm_save_fpu)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu)
>>     SYM_FUNC_START(kvm_restore_fpu)
>>       fpu_restore_double a0 t1
>> @@ -241,7 +251,7 @@ SYM_FUNC_START(kvm_restore_fpu)
>>       fpu_restore_cc       a0 t1 t2
>>       jr                 ra
>>   SYM_FUNC_END(kvm_restore_fpu)
>> -EXPORT_SYMBOL(kvm_restore_fpu)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu)
>>     #ifdef CONFIG_CPU_HAS_LSX
>>   SYM_FUNC_START(kvm_save_lsx)
>> @@ -250,7 +260,7 @@ SYM_FUNC_START(kvm_save_lsx)
>>       lsx_save_data   a0 t1
>>       jr              ra
>>   SYM_FUNC_END(kvm_save_lsx)
>> -EXPORT_SYMBOL(kvm_save_lsx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx)
>>     SYM_FUNC_START(kvm_restore_lsx)
>>       lsx_restore_data a0 t1
>> @@ -258,7 +268,7 @@ SYM_FUNC_START(kvm_restore_lsx)
>>       fpu_restore_csr  a0 t1 t2
>>       jr               ra
>>   SYM_FUNC_END(kvm_restore_lsx)
>> -EXPORT_SYMBOL(kvm_restore_lsx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx)
>>   #endif
>>     #ifdef CONFIG_CPU_HAS_LASX
>> @@ -268,7 +278,7 @@ SYM_FUNC_START(kvm_save_lasx)
>>       lasx_save_data  a0 t1
>>       jr              ra
>>   SYM_FUNC_END(kvm_save_lasx)
>> -EXPORT_SYMBOL(kvm_save_lasx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx)
>>     SYM_FUNC_START(kvm_restore_lasx)
>>       lasx_restore_data a0 t1
>> @@ -276,7 +286,7 @@ SYM_FUNC_START(kvm_restore_lasx)
>>       fpu_restore_csr   a0 t1 t2
>>       jr                ra
>>   SYM_FUNC_END(kvm_restore_lasx)
>> -EXPORT_SYMBOL(kvm_restore_lasx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx)
>>   #endif
>>     #ifdef CONFIG_CPU_HAS_LBT

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Jinyang He 1 month, 1 week ago
On 2025-12-29 11:53, lixianglai wrote:
> Hi Jinyang:
>> On 2025-12-27 09:27, Xianglai Li wrote:
>>
>>> Insert the appropriate UNWIND macro definition into the 
>>> kvm_exc_entry in
>>> the assembly function to guide the generation of correct ORC table 
>>> entries,
>>> thereby solving the timeout problem of loading the livepatch-sample 
>>> module
>>> on a physical machine running multiple vcpus virtual machines.
>>>
>>> While solving the above problems, we have gained an additional benefit,
>>> that is, we can obtain more call stack information
>>>
>>> Stack information that can be obtained before the problem is fixed:
>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>
>>> Stack information that can be obtained after the problem is fixed:
>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>> [<0>] kvm_enter_guest+0x38/0x11c
>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>> [<0>] sys_ioctl+0x498/0xf00
>>> [<0>] do_syscall+0x98/0x1d0
>>> [<0>] handle_syscall+0xb8/0x158
>>>
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>> ---
>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>
>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>>> index 93845ce53651..a3ea9567dbe5 100644
>>> --- a/arch/loongarch/kvm/switch.S
>>> +++ b/arch/loongarch/kvm/switch.S
>>> @@ -10,6 +10,7 @@
>>>   #include <asm/loongarch.h>
>>>   #include <asm/regdef.h>
>>>   #include <asm/unwind_hints.h>
>>> +#include <linux/kvm_types.h>
>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>> @@ -110,9 +111,9 @@
>>>        * need to copy world switch code to DMW area.
>>>        */
>>>       .text
>>> +    .p2align PAGE_SHIFT
>>>       .cfi_sections    .debug_frame
>>>   SYM_CODE_START(kvm_exc_entry)
>>> -    .p2align PAGE_SHIFT
>>>       UNWIND_HINT_UNDEFINED
>>>       csrwr    a2,   KVM_TEMP_KS
>>>       csrrd    a2,   KVM_VCPU_KS
>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>       /* restore per cpu register */
>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>       addi.d    sp, sp, -PT_SIZE
>>> +    UNWIND_HINT_REGS
>>>         /* Prepare handle exception */
>>>       or    a0, s0, zero
>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>       jr      ra
>>>     SYM_CODE_END(kvm_exc_entry)
>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>     /*
>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>       /* Save host GPRs */
>>>       kvm_save_host_gpr a2
>>>   +    /*
>>> +     * The csr_era member variable of the pt_regs structure is 
>>> required
>>> +     * for unwinding orc to perform stack traceback, so we need to put
>>> +     * pc into csr_era member variable here.
>>> +     */
>>> +    pcaddi    t0, 0
>>> +    st.d    t0, a2, PT_ERA
>> Hi, Xianglai,
>>
>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather than
>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark "C-likely"
>> asm functionw. 
>
> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next 
> version.
>
>> I guess the kvm_enter_guest is something like exception
>> handler becuase the last instruction is "ertn". So usually it should
>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>> However, all info is store to "$a2", this mark should be
>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>> I don't konw why save this function internal PC here by `pcaddi t0, 0`,
>> and I think it is no meaning(, for exception handler, they save last PC
>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>> is enable. So I wonder if there is really a stacktrace through this 
>> function?
>>
> The stack backtracking issue in switch.S is rather complex because it 
> involves the switching between cpu root-mode and guest-mode:
> Real stack backtracking should be divided into two parts:
> part 1:
>     [<0>] kvm_enter_guest+0x38/0x11c
>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>     [<0>] sys_ioctl+0x498/0xf00
>     [<0>] do_syscall+0x98/0x1d0
>     [<0>] handle_syscall+0xb8/0x158
>
> part 2:
>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>     [<0>] kvm_exc_entry+0x104/0x1e4
>
>
> In "part 1", after executing kvm_enter_guest, the cpu switches from 
> root-mode to guest-mode.
> In this case, stack backtracking is indeed very rare.
>
> In "part 2", the cpu switches from the guest-mode to the root-mode,
> and most of the stack backtracking occurs during this phase.
>
> To obtain the longest call chain, we save pc in kvm_enter_guest to 
> pt_regs.csr_era,
> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
> The ORC entry was re-established using "UNWIND_HINT_REGS",
>  and then we obtained the following stack backtrace as we wanted:
>
>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>     [<0>] kvm_exc_entry+0x104/0x1e4
I found this might be a coincidence—correct behavior due to the incorrect
UNWIND_HINT_REGS mark and unusual stack adjustment.

First, the kvm_enter_guest contains only a single branch instruction, ertn.
It hardware-jump to the CSR.ERA address directly, jump into kvm_exc_entry.

At this point, the stack layout looks like this:
-------------------------------
   frame from call to `kvm_enter_guest`
-------------------------------  <- $sp
   PT_REGS
-------------------------------  <- $a2

Then kvm_exc_entry adjust stack without save any register (e.g. $ra, $sp)
but still marked UNWIND_HINT_REGS.
After the adjustment:
-------------------------------
   frame from call to `kvm_enter_guest`
-------------------------------
   PT_REGS
-------------------------------  <- $a2, new $sp

During unwinding, when the unwinder reaches kvm_exc_entry,
it meets the mark of PT_REGS and correctly recovers
  pc = regs.csr_era, sp = regs.sp, ra = regs.ra

a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
instead, like call? The kvm_exc_entry cannot back to kvm_enter_guest
if we use "ertn", so should the kvm_enter_guest appear on the stacktrace?

b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
like ret_from_kernel_thread_asm.

> [<0>] kvm_enter_guest+0x38/0x11c
>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>     [<0>] sys_ioctl+0x498/0xf00
>     [<0>] do_syscall+0x98/0x1d0
>     [<0>] handle_syscall+0xb8/0x158
>
> Doing so is equivalent to ignoring the details of the cpu root-mode 
> and guest-mode switching.
> About what you said in the IE enable phase is dangerous,
> interrupts are always off during the cpu root-mode and guest-mode 
> switching in kvm_enter_guest and kvm_exc_entry.
>

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by lixianglai 1 month, 1 week ago
Hi Jinyang:
>
> On 2025-12-29 11:53, lixianglai wrote:
>> Hi Jinyang:
>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>
>>>> Insert the appropriate UNWIND macro definition into the 
>>>> kvm_exc_entry in
>>>> the assembly function to guide the generation of correct ORC table 
>>>> entries,
>>>> thereby solving the timeout problem of loading the livepatch-sample 
>>>> module
>>>> on a physical machine running multiple vcpus virtual machines.
>>>>
>>>> While solving the above problems, we have gained an additional 
>>>> benefit,
>>>> that is, we can obtain more call stack information
>>>>
>>>> Stack information that can be obtained before the problem is fixed:
>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>
>>>> Stack information that can be obtained after the problem is fixed:
>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>> [<0>] sys_ioctl+0x498/0xf00
>>>> [<0>] do_syscall+0x98/0x1d0
>>>> [<0>] handle_syscall+0xb8/0x158
>>>>
>>>> Cc: stable@vger.kernel.org
>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>> ---
>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>
>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>> --- a/arch/loongarch/kvm/switch.S
>>>> +++ b/arch/loongarch/kvm/switch.S
>>>> @@ -10,6 +10,7 @@
>>>>   #include <asm/loongarch.h>
>>>>   #include <asm/regdef.h>
>>>>   #include <asm/unwind_hints.h>
>>>> +#include <linux/kvm_types.h>
>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>> @@ -110,9 +111,9 @@
>>>>        * need to copy world switch code to DMW area.
>>>>        */
>>>>       .text
>>>> +    .p2align PAGE_SHIFT
>>>>       .cfi_sections    .debug_frame
>>>>   SYM_CODE_START(kvm_exc_entry)
>>>> -    .p2align PAGE_SHIFT
>>>>       UNWIND_HINT_UNDEFINED
>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>       csrrd    a2,   KVM_VCPU_KS
>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>       /* restore per cpu register */
>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>       addi.d    sp, sp, -PT_SIZE
>>>> +    UNWIND_HINT_REGS
>>>>         /* Prepare handle exception */
>>>>       or    a0, s0, zero
>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>       jr      ra
>>>>     SYM_CODE_END(kvm_exc_entry)
>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>     /*
>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>       /* Save host GPRs */
>>>>       kvm_save_host_gpr a2
>>>>   +    /*
>>>> +     * The csr_era member variable of the pt_regs structure is 
>>>> required
>>>> +     * for unwinding orc to perform stack traceback, so we need to 
>>>> put
>>>> +     * pc into csr_era member variable here.
>>>> +     */
>>>> +    pcaddi    t0, 0
>>>> +    st.d    t0, a2, PT_ERA
>>> Hi, Xianglai,
>>>
>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather 
>>> than
>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark "C-likely"
>>> asm functionw. 
>>
>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next 
>> version.
>>
>>> I guess the kvm_enter_guest is something like exception
>>> handler becuase the last instruction is "ertn". So usually it should
>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>> However, all info is store to "$a2", this mark should be
>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>> I don't konw why save this function internal PC here by `pcaddi t0, 0`,
>>> and I think it is no meaning(, for exception handler, they save last PC
>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>> is enable. So I wonder if there is really a stacktrace through this 
>>> function?
>>>
>> The stack backtracking issue in switch.S is rather complex because it 
>> involves the switching between cpu root-mode and guest-mode:
>> Real stack backtracking should be divided into two parts:
>> part 1:
>>     [<0>] kvm_enter_guest+0x38/0x11c
>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>     [<0>] sys_ioctl+0x498/0xf00
>>     [<0>] do_syscall+0x98/0x1d0
>>     [<0>] handle_syscall+0xb8/0x158
>>
>> part 2:
>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>
>>
>> In "part 1", after executing kvm_enter_guest, the cpu switches from 
>> root-mode to guest-mode.
>> In this case, stack backtracking is indeed very rare.
>>
>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>> and most of the stack backtracking occurs during this phase.
>>
>> To obtain the longest call chain, we save pc in kvm_enter_guest to 
>> pt_regs.csr_era,
>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>  and then we obtained the following stack backtrace as we wanted:
>>
>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>     [<0>] kvm_exc_entry+0x104/0x1e4
> I found this might be a coincidence—correct behavior due to the incorrect
> UNWIND_HINT_REGS mark and unusual stack adjustment.
>
> First, the kvm_enter_guest contains only a single branch instruction, 
> ertn.
> It hardware-jump to the CSR.ERA address directly, jump into 
> kvm_exc_entry.
>
> At this point, the stack layout looks like this:
> -------------------------------
>   frame from call to `kvm_enter_guest`
> -------------------------------  <- $sp
>   PT_REGS
> -------------------------------  <- $a2
>
> Then kvm_exc_entry adjust stack without save any register (e.g. $ra, $sp)
> but still marked UNWIND_HINT_REGS.
> After the adjustment:
> -------------------------------
>   frame from call to `kvm_enter_guest`
> -------------------------------
>   PT_REGS
> -------------------------------  <- $a2, new $sp
>
> During unwinding, when the unwinder reaches kvm_exc_entry,
> it meets the mark of PT_REGS and correctly recovers
>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
>
Yes, here unwinder does work as you say.

> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
> instead, like call?
No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
at the same time to ensure that its atomic,
there should be no other instruction than' ertn 'more appropriate here.

> The kvm_exc_entry cannot back to kvm_enter_guest
> if we use "ertn", so should the kvm_enter_guest appear on the stacktrace?
>

It is flexible. As I mentioned above, the cpu completes the switch from 
host-mode to guest mode through kvm_enter_guest,
and then the switch from guest mode to host-mode through kvm_exc_entry. 
When we ignore the details of the host-mode
and guest-mode switching in the middle, we can understand that the host 
cpu has completed kvm_enter_guest->kvm_exc_entry.
 From this perspective, I think it can exist in the call stack, and at 
the same time, we have obtained the maximum call stack information.


> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
> like ret_from_kernel_thread_asm.
>
The following command can be used to dump the orc entries of the kernel:
./tools/objtool/objtool --dump vmlinux

You can observe that not all orc entries are generated at the beginning 
of the function.
For example:
handle_tlb_protect
ftrace_stub
handle_reserved

So, is it unnecessary for us to modify UNWIND_HINT_REGS in order to 
place it at the beginning of the function.

If you have a better solution, could you provide an example of the 
modification?
I can test the feasibility of the solution.

Thanks!
Xianglai.

>> [<0>] kvm_enter_guest+0x38/0x11c
>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>     [<0>] sys_ioctl+0x498/0xf00
>>     [<0>] do_syscall+0x98/0x1d0
>>     [<0>] handle_syscall+0xb8/0x158
>>
>> Doing so is equivalent to ignoring the details of the cpu root-mode 
>> and guest-mode switching.
>> About what you said in the IE enable phase is dangerous,
>> interrupts are always off during the cpu root-mode and guest-mode 
>> switching in kvm_enter_guest and kvm_exc_entry.
>>

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Jinyang He 1 month, 1 week ago
On 2025-12-29 18:11, lixianglai wrote:

> Hi Jinyang:
>>
>> On 2025-12-29 11:53, lixianglai wrote:
>>> Hi Jinyang:
>>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>>
>>>>> Insert the appropriate UNWIND macro definition into the 
>>>>> kvm_exc_entry in
>>>>> the assembly function to guide the generation of correct ORC table 
>>>>> entries,
>>>>> thereby solving the timeout problem of loading the 
>>>>> livepatch-sample module
>>>>> on a physical machine running multiple vcpus virtual machines.
>>>>>
>>>>> While solving the above problems, we have gained an additional 
>>>>> benefit,
>>>>> that is, we can obtain more call stack information
>>>>>
>>>>> Stack information that can be obtained before the problem is fixed:
>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>>
>>>>> Stack information that can be obtained after the problem is fixed:
>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>> [<0>] sys_ioctl+0x498/0xf00
>>>>> [<0>] do_syscall+0x98/0x1d0
>>>>> [<0>] handle_syscall+0xb8/0x158
>>>>>
>>>>> Cc: stable@vger.kernel.org
>>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>>> ---
>>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>>
>>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/arch/loongarch/kvm/switch.S 
>>>>> b/arch/loongarch/kvm/switch.S
>>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>>> --- a/arch/loongarch/kvm/switch.S
>>>>> +++ b/arch/loongarch/kvm/switch.S
>>>>> @@ -10,6 +10,7 @@
>>>>>   #include <asm/loongarch.h>
>>>>>   #include <asm/regdef.h>
>>>>>   #include <asm/unwind_hints.h>
>>>>> +#include <linux/kvm_types.h>
>>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>> @@ -110,9 +111,9 @@
>>>>>        * need to copy world switch code to DMW area.
>>>>>        */
>>>>>       .text
>>>>> +    .p2align PAGE_SHIFT
>>>>>       .cfi_sections    .debug_frame
>>>>>   SYM_CODE_START(kvm_exc_entry)
>>>>> -    .p2align PAGE_SHIFT
>>>>>       UNWIND_HINT_UNDEFINED
>>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>>       csrrd    a2,   KVM_VCPU_KS
>>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>       /* restore per cpu register */
>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>>       addi.d    sp, sp, -PT_SIZE
>>>>> +    UNWIND_HINT_REGS
>>>>>         /* Prepare handle exception */
>>>>>       or    a0, s0, zero
>>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>>       jr      ra
>>>>>     SYM_CODE_END(kvm_exc_entry)
>>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>>     /*
>>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>>       /* Save host GPRs */
>>>>>       kvm_save_host_gpr a2
>>>>>   +    /*
>>>>> +     * The csr_era member variable of the pt_regs structure is 
>>>>> required
>>>>> +     * for unwinding orc to perform stack traceback, so we need 
>>>>> to put
>>>>> +     * pc into csr_era member variable here.
>>>>> +     */
>>>>> +    pcaddi    t0, 0
>>>>> +    st.d    t0, a2, PT_ERA
>>>> Hi, Xianglai,
>>>>
>>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather 
>>>> than
>>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark 
>>>> "C-likely"
>>>> asm functionw. 
>>>
>>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next 
>>> version.
>>>
>>>> I guess the kvm_enter_guest is something like exception
>>>> handler becuase the last instruction is "ertn". So usually it should
>>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>>> However, all info is store to "$a2", this mark should be
>>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>>> I don't konw why save this function internal PC here by `pcaddi t0, 
>>>> 0`,
>>>> and I think it is no meaning(, for exception handler, they save 
>>>> last PC
>>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>>> is enable. So I wonder if there is really a stacktrace through this 
>>>> function?
>>>>
>>> The stack backtracking issue in switch.S is rather complex because 
>>> it involves the switching between cpu root-mode and guest-mode:
>>> Real stack backtracking should be divided into two parts:
>>> part 1:
>>>     [<0>] kvm_enter_guest+0x38/0x11c
>>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>     [<0>] sys_ioctl+0x498/0xf00
>>>     [<0>] do_syscall+0x98/0x1d0
>>>     [<0>] handle_syscall+0xb8/0x158
>>>
>>> part 2:
>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>
>>>
>>> In "part 1", after executing kvm_enter_guest, the cpu switches from 
>>> root-mode to guest-mode.
>>> In this case, stack backtracking is indeed very rare.
>>>
>>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>>> and most of the stack backtracking occurs during this phase.
>>>
>>> To obtain the longest call chain, we save pc in kvm_enter_guest to 
>>> pt_regs.csr_era,
>>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>>  and then we obtained the following stack backtrace as we wanted:
>>>
>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>> I found this might be a coincidence—correct behavior due to the 
>> incorrect
>> UNWIND_HINT_REGS mark and unusual stack adjustment.
>>
>> First, the kvm_enter_guest contains only a single branch instruction, 
>> ertn.
>> It hardware-jump to the CSR.ERA address directly, jump into 
>> kvm_exc_entry.
>>
>> At this point, the stack layout looks like this:
>> -------------------------------
>>   frame from call to `kvm_enter_guest`
>> -------------------------------  <- $sp
>>   PT_REGS
>> -------------------------------  <- $a2
>>
>> Then kvm_exc_entry adjust stack without save any register (e.g. $ra, 
>> $sp)
>> but still marked UNWIND_HINT_REGS.
>> After the adjustment:
>> -------------------------------
>>   frame from call to `kvm_enter_guest`
>> -------------------------------
>>   PT_REGS
>> -------------------------------  <- $a2, new $sp
>>
>> During unwinding, when the unwinder reaches kvm_exc_entry,
>> it meets the mark of PT_REGS and correctly recovers
>>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
>>
> Yes, here unwinder does work as you say.
>
>> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
>> instead, like call?
> No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
> at the same time to ensure that its atomic,
> there should be no other instruction than' ertn 'more appropriate here.
You are right! I got it.
>
>> The kvm_exc_entry cannot back to kvm_enter_guest
>> if we use "ertn", so should the kvm_enter_guest appear on the 
>> stacktrace?
>>
>
> It is flexible. As I mentioned above, the cpu completes the switch 
> from host-mode to guest mode through kvm_enter_guest,
> and then the switch from guest mode to host-mode through 
> kvm_exc_entry. When we ignore the details of the host-mode
> and guest-mode switching in the middle, we can understand that the 
> host cpu has completed kvm_enter_guest->kvm_exc_entry.
> From this perspective, I think it can exist in the call stack, and at 
> the same time, we have obtained the maximum call stack information.
>
>
>> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
>> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
>> like ret_from_kernel_thread_asm.
>>
> The following command can be used to dump the orc entries of the kernel:
> ./tools/objtool/objtool --dump vmlinux
>
> You can observe that not all orc entries are generated at the 
> beginning of the function.
> For example:
> handle_tlb_protect
> ftrace_stub
> handle_reserved
>
> So, is it unnecessary for us to modify UNWIND_HINT_REGS in order to 
> place it at the beginning of the function.
>
> If you have a better solution, could you provide an example of the 
> modification?
> I can test the feasibility of the solution.
>
The expression at the beginning of the function is incorrect (feeling 
sorry).
It should be marked where have all stacktrace info.
Thanks for all the explaining, since I'm unfamiliar with kvm, I need 
these to help my understanding.

Can you try with follows, with save regs by $sp, set more precise era to 
pt_regs, and more unwind hint.


diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index f1768b7a6194..8ed1d7b72c54 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -14,13 +14,13 @@
  #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)

  .macro kvm_save_host_gpr base
-    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
+    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
      st.d    $r\n, \base, HGPR_OFFSET(\n)
      .endr
  .endm

  .macro kvm_restore_host_gpr base
-    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
+    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
      ld.d    $r\n, \base, HGPR_OFFSET(\n)
      .endr
  .endm
@@ -88,6 +88,7 @@
      /* Load KVM_ARCH register */
      ld.d    a2, a2,    (KVM_ARCH_GGPR + 8 * REG_A2)

+111:
      ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, 
TLBRPRMD.ISTLBR = 0 */
  .endm

@@ -158,9 +159,10 @@ SYM_CODE_START(kvm_exc_entry)
      csrwr    t0, LOONGARCH_CSR_GTLBC
      ld.d    tp, a2, KVM_ARCH_HTP
      ld.d    sp, a2, KVM_ARCH_HSP
+    UNWIND_HINT_REGS
+
      /* restore per cpu register */
      ld.d    u0, a2, KVM_ARCH_HPERCPU
-    addi.d    sp, sp, -PT_SIZE

      /* Prepare handle exception */
      or    a0, s0, zero
@@ -184,10 +186,11 @@ SYM_CODE_START(kvm_exc_entry)
      csrwr    s1, KVM_VCPU_KS
      kvm_switch_to_guest

+    UNWIND_HINT_UNDEFINED
  ret_to_host:
-    ld.d    a2, a2, KVM_ARCH_HSP
-    addi.d  a2, a2, -PT_SIZE
-    kvm_restore_host_gpr    a2
+    ld.d    sp, a2, KVM_ARCH_HSP
+    kvm_restore_host_gpr    sp
+    addi.d    sp, sp, PT_SIZE
      jr      ra

  SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
@@ -200,11 +203,15 @@ SYM_CODE_END(kvm_exc_entry)
   *  a0: kvm_run* run
   *  a1: kvm_vcpu* vcpu
   */
-SYM_FUNC_START(kvm_enter_guest)
+SYM_CODE_START(kvm_enter_guest)
+    UNWIND_HINT_UNDEFINED
      /* Allocate space in stack bottom */
-    addi.d    a2, sp, -PT_SIZE
+    addi.d    sp, sp, -PT_SIZE
      /* Save host GPRs */
-    kvm_save_host_gpr a2
+    kvm_save_host_gpr sp
+    la.pcrel a2, 111f
+    st.d     a2, sp, PT_ERA
+    UNWIND_HINT_REGS

      addi.d    a2, a1, KVM_VCPU_ARCH
      st.d    sp, a2, KVM_ARCH_HSP

Jinyang

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Bibo Mao 1 month, 1 week ago

On 2025/12/29 下午6:41, Jinyang He wrote:
> On 2025-12-29 18:11, lixianglai wrote:
> 
>> Hi Jinyang:
>>>
>>> On 2025-12-29 11:53, lixianglai wrote:
>>>> Hi Jinyang:
>>>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>>>
>>>>>> Insert the appropriate UNWIND macro definition into the 
>>>>>> kvm_exc_entry in
>>>>>> the assembly function to guide the generation of correct ORC table 
>>>>>> entries,
>>>>>> thereby solving the timeout problem of loading the 
>>>>>> livepatch-sample module
>>>>>> on a physical machine running multiple vcpus virtual machines.
>>>>>>
>>>>>> While solving the above problems, we have gained an additional 
>>>>>> benefit,
>>>>>> that is, we can obtain more call stack information
>>>>>>
>>>>>> Stack information that can be obtained before the problem is fixed:
>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>>>
>>>>>> Stack information that can be obtained after the problem is fixed:
>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>> [<0>] sys_ioctl+0x498/0xf00
>>>>>> [<0>] do_syscall+0x98/0x1d0
>>>>>> [<0>] handle_syscall+0xb8/0x158
>>>>>>
>>>>>> Cc: stable@vger.kernel.org
>>>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>>>> ---
>>>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>>>
>>>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/loongarch/kvm/switch.S 
>>>>>> b/arch/loongarch/kvm/switch.S
>>>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>>>> --- a/arch/loongarch/kvm/switch.S
>>>>>> +++ b/arch/loongarch/kvm/switch.S
>>>>>> @@ -10,6 +10,7 @@
>>>>>>   #include <asm/loongarch.h>
>>>>>>   #include <asm/regdef.h>
>>>>>>   #include <asm/unwind_hints.h>
>>>>>> +#include <linux/kvm_types.h>
>>>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>>> @@ -110,9 +111,9 @@
>>>>>>        * need to copy world switch code to DMW area.
>>>>>>        */
>>>>>>       .text
>>>>>> +    .p2align PAGE_SHIFT
>>>>>>       .cfi_sections    .debug_frame
>>>>>>   SYM_CODE_START(kvm_exc_entry)
>>>>>> -    .p2align PAGE_SHIFT
>>>>>>       UNWIND_HINT_UNDEFINED
>>>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>>>       csrrd    a2,   KVM_VCPU_KS
>>>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>>       /* restore per cpu register */
>>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>>>       addi.d    sp, sp, -PT_SIZE
>>>>>> +    UNWIND_HINT_REGS
>>>>>>         /* Prepare handle exception */
>>>>>>       or    a0, s0, zero
>>>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>>>       jr      ra
>>>>>>     SYM_CODE_END(kvm_exc_entry)
>>>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>>>     /*
>>>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>>>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>>>       /* Save host GPRs */
>>>>>>       kvm_save_host_gpr a2
>>>>>>   +    /*
>>>>>> +     * The csr_era member variable of the pt_regs structure is 
>>>>>> required
>>>>>> +     * for unwinding orc to perform stack traceback, so we need 
>>>>>> to put
>>>>>> +     * pc into csr_era member variable here.
>>>>>> +     */
>>>>>> +    pcaddi    t0, 0
>>>>>> +    st.d    t0, a2, PT_ERA
>>>>> Hi, Xianglai,
>>>>>
>>>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather 
>>>>> than
>>>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark 
>>>>> "C-likely"
>>>>> asm functionw. 
>>>>
>>>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next 
>>>> version.
>>>>
>>>>> I guess the kvm_enter_guest is something like exception
>>>>> handler becuase the last instruction is "ertn". So usually it should
>>>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>>>> However, all info is store to "$a2", this mark should be
>>>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>>>> I don't konw why save this function internal PC here by `pcaddi t0, 
>>>>> 0`,
>>>>> and I think it is no meaning(, for exception handler, they save 
>>>>> last PC
>>>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>>>> is enable. So I wonder if there is really a stacktrace through this 
>>>>> function?
>>>>>
>>>> The stack backtracking issue in switch.S is rather complex because 
>>>> it involves the switching between cpu root-mode and guest-mode:
>>>> Real stack backtracking should be divided into two parts:
>>>> part 1:
>>>>     [<0>] kvm_enter_guest+0x38/0x11c
>>>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>     [<0>] sys_ioctl+0x498/0xf00
>>>>     [<0>] do_syscall+0x98/0x1d0
>>>>     [<0>] handle_syscall+0xb8/0x158
>>>>
>>>> part 2:
>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>
>>>>
>>>> In "part 1", after executing kvm_enter_guest, the cpu switches from 
>>>> root-mode to guest-mode.
>>>> In this case, stack backtracking is indeed very rare.
>>>>
>>>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>>>> and most of the stack backtracking occurs during this phase.
>>>>
>>>> To obtain the longest call chain, we save pc in kvm_enter_guest to 
>>>> pt_regs.csr_era,
>>>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>>>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>>>  and then we obtained the following stack backtrace as we wanted:
>>>>
>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>> I found this might be a coincidence—correct behavior due to the 
>>> incorrect
>>> UNWIND_HINT_REGS mark and unusual stack adjustment.
>>>
>>> First, the kvm_enter_guest contains only a single branch instruction, 
>>> ertn.
>>> It hardware-jump to the CSR.ERA address directly, jump into 
>>> kvm_exc_entry.
>>>
>>> At this point, the stack layout looks like this:
>>> -------------------------------
>>>   frame from call to `kvm_enter_guest`
>>> -------------------------------  <- $sp
>>>   PT_REGS
>>> -------------------------------  <- $a2
>>>
>>> Then kvm_exc_entry adjust stack without save any register (e.g. $ra, 
>>> $sp)
>>> but still marked UNWIND_HINT_REGS.
>>> After the adjustment:
>>> -------------------------------
>>>   frame from call to `kvm_enter_guest`
>>> -------------------------------
>>>   PT_REGS
>>> -------------------------------  <- $a2, new $sp
>>>
>>> During unwinding, when the unwinder reaches kvm_exc_entry,
>>> it meets the mark of PT_REGS and correctly recovers
>>>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
>>>
>> Yes, here unwinder does work as you say.
>>
>>> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
>>> instead, like call?
>> No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
>> at the same time to ensure that its atomic,
>> there should be no other instruction than' ertn 'more appropriate here.
> You are right! I got it.
>>
>>> The kvm_exc_entry cannot back to kvm_enter_guest
>>> if we use "ertn", so should the kvm_enter_guest appear on the 
>>> stacktrace?
>>>
>>
>> It is flexible. As I mentioned above, the cpu completes the switch 
>> from host-mode to guest mode through kvm_enter_guest,
>> and then the switch from guest mode to host-mode through 
>> kvm_exc_entry. When we ignore the details of the host-mode
>> and guest-mode switching in the middle, we can understand that the 
>> host cpu has completed kvm_enter_guest->kvm_exc_entry.
>> From this perspective, I think it can exist in the call stack, and at 
>> the same time, we have obtained the maximum call stack information.
>>
>>
>>> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
>>> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
>>> like ret_from_kernel_thread_asm.
>>>
>> The following command can be used to dump the orc entries of the kernel:
>> ./tools/objtool/objtool --dump vmlinux
>>
>> You can observe that not all orc entries are generated at the 
>> beginning of the function.
>> For example:
>> handle_tlb_protect
>> ftrace_stub
>> handle_reserved
>>
>> So, is it unnecessary for us to modify UNWIND_HINT_REGS in order to 
>> place it at the beginning of the function.
>>
>> If you have a better solution, could you provide an example of the 
>> modification?
>> I can test the feasibility of the solution.
>>
> The expression at the beginning of the function is incorrect (feeling 
> sorry).
> It should be marked where have all stacktrace info.
> Thanks for all the explaining, since I'm unfamiliar with kvm, I need 
> these to help my understanding.
> 
> Can you try with follows, with save regs by $sp, set more precise era to 
> pt_regs, and more unwind hint.
> 
> 
> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> index f1768b7a6194..8ed1d7b72c54 100644
> --- a/arch/loongarch/kvm/switch.S
> +++ b/arch/loongarch/kvm/switch.S
> @@ -14,13 +14,13 @@
>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
> 
>   .macro kvm_save_host_gpr base
> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>       st.d    $r\n, \base, HGPR_OFFSET(\n)
>       .endr
>   .endm
> 
>   .macro kvm_restore_host_gpr base
> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>       ld.d    $r\n, \base, HGPR_OFFSET(\n)
>       .endr
>   .endm
> @@ -88,6 +88,7 @@
>       /* Load KVM_ARCH register */
>       ld.d    a2, a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
> 
> +111:
>       ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, 
> TLBRPRMD.ISTLBR = 0 */
>   .endm
> 
> @@ -158,9 +159,10 @@ SYM_CODE_START(kvm_exc_entry)
>       csrwr    t0, LOONGARCH_CSR_GTLBC
>       ld.d    tp, a2, KVM_ARCH_HTP
>       ld.d    sp, a2, KVM_ARCH_HSP
> +    UNWIND_HINT_REGS
> +
>       /* restore per cpu register */
>       ld.d    u0, a2, KVM_ARCH_HPERCPU
> -    addi.d    sp, sp, -PT_SIZE
> 
>       /* Prepare handle exception */
>       or    a0, s0, zero
> @@ -184,10 +186,11 @@ SYM_CODE_START(kvm_exc_entry)
>       csrwr    s1, KVM_VCPU_KS
>       kvm_switch_to_guest
> 
> +    UNWIND_HINT_UNDEFINED
>   ret_to_host:
> -    ld.d    a2, a2, KVM_ARCH_HSP
> -    addi.d  a2, a2, -PT_SIZE
> -    kvm_restore_host_gpr    a2
> +    ld.d    sp, a2, KVM_ARCH_HSP
> +    kvm_restore_host_gpr    sp
> +    addi.d    sp, sp, PT_SIZE
>       jr      ra
> 
>   SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
> @@ -200,11 +203,15 @@ SYM_CODE_END(kvm_exc_entry)
>    *  a0: kvm_run* run
>    *  a1: kvm_vcpu* vcpu
>    */
> -SYM_FUNC_START(kvm_enter_guest)
> +SYM_CODE_START(kvm_enter_guest)
> +    UNWIND_HINT_UNDEFINED
>       /* Allocate space in stack bottom */
> -    addi.d    a2, sp, -PT_SIZE
> +    addi.d    sp, sp, -PT_SIZE
>       /* Save host GPRs */
> -    kvm_save_host_gpr a2
> +    kvm_save_host_gpr sp
> +    la.pcrel a2, 111f
> +    st.d     a2, sp, PT_ERA
> +    UNWIND_HINT_REGS
> 
why the label 111f is more accurate?  Supposing there is hw breakpoint 
here and backtrace is called, what is the call trace stack then? obvious 
label 111f is not executed instead.

UNWIND_HINT_REGS is used for nested kernel stack, is that right?
With nested interrupt and exception handlers on LoongArch kernel, is 
UNWIND_HINT_REGS used?

SYM_CODE_START(ret_from_fork_asm)
         UNWIND_HINT_REGS
         move            a1, sp
         bl              ret_from_fork
         STACKLEAK_ERASE
         RESTORE_STATIC
         RESTORE_SOME
         RESTORE_SP_AND_RET
SYM_CODE_END(ret_from_fork_asm)
With this piece of code, what is contents of pt_regs? In generic it is 
called from sys_clone, era is user PC address, is that right? If so,
what is detailed usage in the beginning of ret_from_fork_asm?

Regards
Bibo Mao

>       addi.d    a2, a1, KVM_VCPU_ARCH
>       st.d    sp, a2, KVM_ARCH_HSP
> 
> Jinyang
> 

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Jinyang He 1 month, 1 week ago
On 2025-12-30 10:24, Bibo Mao wrote:

>
>
> On 2025/12/29 下午6:41, Jinyang He wrote:
>> On 2025-12-29 18:11, lixianglai wrote:
>>
>>> Hi Jinyang:
>>>>
>>>> On 2025-12-29 11:53, lixianglai wrote:
>>>>> Hi Jinyang:
>>>>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>>>>
>>>>>>> Insert the appropriate UNWIND macro definition into the 
>>>>>>> kvm_exc_entry in
>>>>>>> the assembly function to guide the generation of correct ORC 
>>>>>>> table entries,
>>>>>>> thereby solving the timeout problem of loading the 
>>>>>>> livepatch-sample module
>>>>>>> on a physical machine running multiple vcpus virtual machines.
>>>>>>>
>>>>>>> While solving the above problems, we have gained an additional 
>>>>>>> benefit,
>>>>>>> that is, we can obtain more call stack information
>>>>>>>
>>>>>>> Stack information that can be obtained before the problem is fixed:
>>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>>>>
>>>>>>> Stack information that can be obtained after the problem is fixed:
>>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>>> [<0>] sys_ioctl+0x498/0xf00
>>>>>>> [<0>] do_syscall+0x98/0x1d0
>>>>>>> [<0>] handle_syscall+0xb8/0x158
>>>>>>>
>>>>>>> Cc: stable@vger.kernel.org
>>>>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>>>>> ---
>>>>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>>>>
>>>>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/loongarch/kvm/switch.S 
>>>>>>> b/arch/loongarch/kvm/switch.S
>>>>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>>>>> --- a/arch/loongarch/kvm/switch.S
>>>>>>> +++ b/arch/loongarch/kvm/switch.S
>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>   #include <asm/loongarch.h>
>>>>>>>   #include <asm/regdef.h>
>>>>>>>   #include <asm/unwind_hints.h>
>>>>>>> +#include <linux/kvm_types.h>
>>>>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>>>> @@ -110,9 +111,9 @@
>>>>>>>        * need to copy world switch code to DMW area.
>>>>>>>        */
>>>>>>>       .text
>>>>>>> +    .p2align PAGE_SHIFT
>>>>>>>       .cfi_sections    .debug_frame
>>>>>>>   SYM_CODE_START(kvm_exc_entry)
>>>>>>> -    .p2align PAGE_SHIFT
>>>>>>>       UNWIND_HINT_UNDEFINED
>>>>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>>>>       csrrd    a2,   KVM_VCPU_KS
>>>>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>>>       /* restore per cpu register */
>>>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>>>>       addi.d    sp, sp, -PT_SIZE
>>>>>>> +    UNWIND_HINT_REGS
>>>>>>>         /* Prepare handle exception */
>>>>>>>       or    a0, s0, zero
>>>>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>>>>       jr      ra
>>>>>>>     SYM_CODE_END(kvm_exc_entry)
>>>>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>>>>     /*
>>>>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu 
>>>>>>> *vcpu)
>>>>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>>>>       /* Save host GPRs */
>>>>>>>       kvm_save_host_gpr a2
>>>>>>>   +    /*
>>>>>>> +     * The csr_era member variable of the pt_regs structure is 
>>>>>>> required
>>>>>>> +     * for unwinding orc to perform stack traceback, so we need 
>>>>>>> to put
>>>>>>> +     * pc into csr_era member variable here.
>>>>>>> +     */
>>>>>>> +    pcaddi    t0, 0
>>>>>>> +    st.d    t0, a2, PT_ERA
>>>>>> Hi, Xianglai,
>>>>>>
>>>>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` 
>>>>>> rather than
>>>>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark 
>>>>>> "C-likely"
>>>>>> asm functionw. 
>>>>>
>>>>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next 
>>>>> version.
>>>>>
>>>>>> I guess the kvm_enter_guest is something like exception
>>>>>> handler becuase the last instruction is "ertn". So usually it should
>>>>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>>>>> However, all info is store to "$a2", this mark should be
>>>>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>>>>> I don't konw why save this function internal PC here by `pcaddi 
>>>>>> t0, 0`,
>>>>>> and I think it is no meaning(, for exception handler, they save 
>>>>>> last PC
>>>>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>>>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>>>>> is enable. So I wonder if there is really a stacktrace through 
>>>>>> this function?
>>>>>>
>>>>> The stack backtracking issue in switch.S is rather complex because 
>>>>> it involves the switching between cpu root-mode and guest-mode:
>>>>> Real stack backtracking should be divided into two parts:
>>>>> part 1:
>>>>>     [<0>] kvm_enter_guest+0x38/0x11c
>>>>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>     [<0>] sys_ioctl+0x498/0xf00
>>>>>     [<0>] do_syscall+0x98/0x1d0
>>>>>     [<0>] handle_syscall+0xb8/0x158
>>>>>
>>>>> part 2:
>>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>
>>>>>
>>>>> In "part 1", after executing kvm_enter_guest, the cpu switches 
>>>>> from root-mode to guest-mode.
>>>>> In this case, stack backtracking is indeed very rare.
>>>>>
>>>>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>>>>> and most of the stack backtracking occurs during this phase.
>>>>>
>>>>> To obtain the longest call chain, we save pc in kvm_enter_guest to 
>>>>> pt_regs.csr_era,
>>>>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>>>>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>>>>  and then we obtained the following stack backtrace as we wanted:
>>>>>
>>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>> I found this might be a coincidence—correct behavior due to the 
>>>> incorrect
>>>> UNWIND_HINT_REGS mark and unusual stack adjustment.
>>>>
>>>> First, the kvm_enter_guest contains only a single branch 
>>>> instruction, ertn.
>>>> It hardware-jump to the CSR.ERA address directly, jump into 
>>>> kvm_exc_entry.
>>>>
>>>> At this point, the stack layout looks like this:
>>>> -------------------------------
>>>>   frame from call to `kvm_enter_guest`
>>>> -------------------------------  <- $sp
>>>>   PT_REGS
>>>> -------------------------------  <- $a2
>>>>
>>>> Then kvm_exc_entry adjust stack without save any register (e.g. 
>>>> $ra, $sp)
>>>> but still marked UNWIND_HINT_REGS.
>>>> After the adjustment:
>>>> -------------------------------
>>>>   frame from call to `kvm_enter_guest`
>>>> -------------------------------
>>>>   PT_REGS
>>>> -------------------------------  <- $a2, new $sp
>>>>
>>>> During unwinding, when the unwinder reaches kvm_exc_entry,
>>>> it meets the mark of PT_REGS and correctly recovers
>>>>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
>>>>
>>> Yes, here unwinder does work as you say.
>>>
>>>> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
>>>> instead, like call?
>>> No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
>>> at the same time to ensure that its atomic,
>>> there should be no other instruction than' ertn 'more appropriate here.
>> You are right! I got it.
>>>
>>>> The kvm_exc_entry cannot back to kvm_enter_guest
>>>> if we use "ertn", so should the kvm_enter_guest appear on the 
>>>> stacktrace?
>>>>
>>>
>>> It is flexible. As I mentioned above, the cpu completes the switch 
>>> from host-mode to guest mode through kvm_enter_guest,
>>> and then the switch from guest mode to host-mode through 
>>> kvm_exc_entry. When we ignore the details of the host-mode
>>> and guest-mode switching in the middle, we can understand that the 
>>> host cpu has completed kvm_enter_guest->kvm_exc_entry.
>>> From this perspective, I think it can exist in the call stack, and 
>>> at the same time, we have obtained the maximum call stack information.
>>>
>>>
>>>> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
>>>> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
>>>> like ret_from_kernel_thread_asm.
>>>>
>>> The following command can be used to dump the orc entries of the 
>>> kernel:
>>> ./tools/objtool/objtool --dump vmlinux
>>>
>>> You can observe that not all orc entries are generated at the 
>>> beginning of the function.
>>> For example:
>>> handle_tlb_protect
>>> ftrace_stub
>>> handle_reserved
>>>
>>> So, is it unnecessary for us to modify UNWIND_HINT_REGS in order to 
>>> place it at the beginning of the function.
>>>
>>> If you have a better solution, could you provide an example of the 
>>> modification?
>>> I can test the feasibility of the solution.
>>>
>> The expression at the beginning of the function is incorrect (feeling 
>> sorry).
>> It should be marked where have all stacktrace info.
>> Thanks for all the explaining, since I'm unfamiliar with kvm, I need 
>> these to help my understanding.
>>
>> Can you try with follows, with save regs by $sp, set more precise era 
>> to pt_regs, and more unwind hint.
>>
>>
>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>> index f1768b7a6194..8ed1d7b72c54 100644
>> --- a/arch/loongarch/kvm/switch.S
>> +++ b/arch/loongarch/kvm/switch.S
>> @@ -14,13 +14,13 @@
>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>
>>   .macro kvm_save_host_gpr base
>> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>>       st.d    $r\n, \base, HGPR_OFFSET(\n)
>>       .endr
>>   .endm
>>
>>   .macro kvm_restore_host_gpr base
>> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>>       ld.d    $r\n, \base, HGPR_OFFSET(\n)
>>       .endr
>>   .endm
>> @@ -88,6 +88,7 @@
>>       /* Load KVM_ARCH register */
>>       ld.d    a2, a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
>>
>> +111:
>>       ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, 
>> TLBRPRMD.ISTLBR = 0 */
>>   .endm
>>
>> @@ -158,9 +159,10 @@ SYM_CODE_START(kvm_exc_entry)
>>       csrwr    t0, LOONGARCH_CSR_GTLBC
>>       ld.d    tp, a2, KVM_ARCH_HTP
>>       ld.d    sp, a2, KVM_ARCH_HSP
>> +    UNWIND_HINT_REGS
>> +
>>       /* restore per cpu register */
>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>> -    addi.d    sp, sp, -PT_SIZE
>>
>>       /* Prepare handle exception */
>>       or    a0, s0, zero
>> @@ -184,10 +186,11 @@ SYM_CODE_START(kvm_exc_entry)
>>       csrwr    s1, KVM_VCPU_KS
>>       kvm_switch_to_guest
>>
>> +    UNWIND_HINT_UNDEFINED
>>   ret_to_host:
>> -    ld.d    a2, a2, KVM_ARCH_HSP
>> -    addi.d  a2, a2, -PT_SIZE
>> -    kvm_restore_host_gpr    a2
>> +    ld.d    sp, a2, KVM_ARCH_HSP
>> +    kvm_restore_host_gpr    sp
>> +    addi.d    sp, sp, PT_SIZE
>>       jr      ra
>>
>>   SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
>> @@ -200,11 +203,15 @@ SYM_CODE_END(kvm_exc_entry)
>>    *  a0: kvm_run* run
>>    *  a1: kvm_vcpu* vcpu
>>    */
>> -SYM_FUNC_START(kvm_enter_guest)
>> +SYM_CODE_START(kvm_enter_guest)
>> +    UNWIND_HINT_UNDEFINED
>>       /* Allocate space in stack bottom */
>> -    addi.d    a2, sp, -PT_SIZE
>> +    addi.d    sp, sp, -PT_SIZE
>>       /* Save host GPRs */
>> -    kvm_save_host_gpr a2
>> +    kvm_save_host_gpr sp
>> +    la.pcrel a2, 111f
>> +    st.d     a2, sp, PT_ERA
>> +    UNWIND_HINT_REGS
>>
> why the label 111f is more accurate?  Supposing there is hw breakpoint 
> here and backtrace is called, what is the call trace stack then? 
> obvious label 111f is not executed instead.
Xianglai said marking it as regs can get more stack infos, so I use
UNWIND_HINT_REGS marked here, though it not called. Remove
UNWIND_HINT_REGS thenforbid unwind from here.
This function is called and should usually be marked as "call",
but it is complex by switching the stack and use `ertn` calls
another function.


>
> UNWIND_HINT_REGS is used for nested kernel stack, is that right?
> With nested interrupt and exception handlers on LoongArch kernel, is 
> UNWIND_HINT_REGS used?
>
> SYM_CODE_START(ret_from_fork_asm)
>         UNWIND_HINT_REGS
>         move            a1, sp
>         bl              ret_from_fork
>         STACKLEAK_ERASE
>         RESTORE_STATIC
>         RESTORE_SOME
>         RESTORE_SP_AND_RET
> SYM_CODE_END(ret_from_fork_asm)
> With this piece of code, what is contents of pt_regs? In generic it is 
> called from sys_clone, era is user PC address, is that right? If so,
> what is detailed usage in the beginning of ret_from_fork_asm?
The stacktrace shows the control flow where the PC will go back, so
it is right because when PC is in ret_from_fork_asm, it is already
another thread. The era means it will go back user mode.

Jinyang
>
> Regards
> Bibo Mao
>
>>       addi.d    a2, a1, KVM_VCPU_ARCH
>>       st.d    sp, a2, KVM_ARCH_HSP
>>
>> Jinyang
>>

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Bibo Mao 1 month, 1 week ago

On 2025/12/30 上午11:36, Jinyang He wrote:
> On 2025-12-30 10:24, Bibo Mao wrote:
> 
>>
>>
>> On 2025/12/29 下午6:41, Jinyang He wrote:
>>> On 2025-12-29 18:11, lixianglai wrote:
>>>
>>>> Hi Jinyang:
>>>>>
>>>>> On 2025-12-29 11:53, lixianglai wrote:
>>>>>> Hi Jinyang:
>>>>>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>>>>>
>>>>>>>> Insert the appropriate UNWIND macro definition into the 
>>>>>>>> kvm_exc_entry in
>>>>>>>> the assembly function to guide the generation of correct ORC 
>>>>>>>> table entries,
>>>>>>>> thereby solving the timeout problem of loading the 
>>>>>>>> livepatch-sample module
>>>>>>>> on a physical machine running multiple vcpus virtual machines.
>>>>>>>>
>>>>>>>> While solving the above problems, we have gained an additional 
>>>>>>>> benefit,
>>>>>>>> that is, we can obtain more call stack information
>>>>>>>>
>>>>>>>> Stack information that can be obtained before the problem is fixed:
>>>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>>>>>
>>>>>>>> Stack information that can be obtained after the problem is fixed:
>>>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>>>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>>>> [<0>] sys_ioctl+0x498/0xf00
>>>>>>>> [<0>] do_syscall+0x98/0x1d0
>>>>>>>> [<0>] handle_syscall+0xb8/0x158
>>>>>>>>
>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>>>>>> ---
>>>>>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>>>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>>>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>>>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>>>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>>>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>>>>>
>>>>>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/kvm/switch.S 
>>>>>>>> b/arch/loongarch/kvm/switch.S
>>>>>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>>>>>> --- a/arch/loongarch/kvm/switch.S
>>>>>>>> +++ b/arch/loongarch/kvm/switch.S
>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>>   #include <asm/loongarch.h>
>>>>>>>>   #include <asm/regdef.h>
>>>>>>>>   #include <asm/unwind_hints.h>
>>>>>>>> +#include <linux/kvm_types.h>
>>>>>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>>>>> @@ -110,9 +111,9 @@
>>>>>>>>        * need to copy world switch code to DMW area.
>>>>>>>>        */
>>>>>>>>       .text
>>>>>>>> +    .p2align PAGE_SHIFT
>>>>>>>>       .cfi_sections    .debug_frame
>>>>>>>>   SYM_CODE_START(kvm_exc_entry)
>>>>>>>> -    .p2align PAGE_SHIFT
>>>>>>>>       UNWIND_HINT_UNDEFINED
>>>>>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>>>>>       csrrd    a2,   KVM_VCPU_KS
>>>>>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>>>>       /* restore per cpu register */
>>>>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>>>>>       addi.d    sp, sp, -PT_SIZE
>>>>>>>> +    UNWIND_HINT_REGS
>>>>>>>>         /* Prepare handle exception */
>>>>>>>>       or    a0, s0, zero
>>>>>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>>>>>       jr      ra
>>>>>>>>     SYM_CODE_END(kvm_exc_entry)
>>>>>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>>>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>>>>>     /*
>>>>>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu 
>>>>>>>> *vcpu)
>>>>>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>>>>>       /* Save host GPRs */
>>>>>>>>       kvm_save_host_gpr a2
>>>>>>>>   +    /*
>>>>>>>> +     * The csr_era member variable of the pt_regs structure is 
>>>>>>>> required
>>>>>>>> +     * for unwinding orc to perform stack traceback, so we need 
>>>>>>>> to put
>>>>>>>> +     * pc into csr_era member variable here.
>>>>>>>> +     */
>>>>>>>> +    pcaddi    t0, 0
>>>>>>>> +    st.d    t0, a2, PT_ERA
>>>>>>> Hi, Xianglai,
>>>>>>>
>>>>>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` 
>>>>>>> rather than
>>>>>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark 
>>>>>>> "C-likely"
>>>>>>> asm functionw. 
>>>>>>
>>>>>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next 
>>>>>> version.
>>>>>>
>>>>>>> I guess the kvm_enter_guest is something like exception
>>>>>>> handler becuase the last instruction is "ertn". So usually it should
>>>>>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>>>>>> However, all info is store to "$a2", this mark should be
>>>>>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>>>>>> I don't konw why save this function internal PC here by `pcaddi 
>>>>>>> t0, 0`,
>>>>>>> and I think it is no meaning(, for exception handler, they save 
>>>>>>> last PC
>>>>>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>>>>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>>>>>> is enable. So I wonder if there is really a stacktrace through 
>>>>>>> this function?
>>>>>>>
>>>>>> The stack backtracking issue in switch.S is rather complex because 
>>>>>> it involves the switching between cpu root-mode and guest-mode:
>>>>>> Real stack backtracking should be divided into two parts:
>>>>>> part 1:
>>>>>>     [<0>] kvm_enter_guest+0x38/0x11c
>>>>>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>>     [<0>] sys_ioctl+0x498/0xf00
>>>>>>     [<0>] do_syscall+0x98/0x1d0
>>>>>>     [<0>] handle_syscall+0xb8/0x158
>>>>>>
>>>>>> part 2:
>>>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>>
>>>>>>
>>>>>> In "part 1", after executing kvm_enter_guest, the cpu switches 
>>>>>> from root-mode to guest-mode.
>>>>>> In this case, stack backtracking is indeed very rare.
>>>>>>
>>>>>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>>>>>> and most of the stack backtracking occurs during this phase.
>>>>>>
>>>>>> To obtain the longest call chain, we save pc in kvm_enter_guest to 
>>>>>> pt_regs.csr_era,
>>>>>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>>>>>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>>>>>  and then we obtained the following stack backtrace as we wanted:
>>>>>>
>>>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>> I found this might be a coincidence—correct behavior due to the 
>>>>> incorrect
>>>>> UNWIND_HINT_REGS mark and unusual stack adjustment.
>>>>>
>>>>> First, the kvm_enter_guest contains only a single branch 
>>>>> instruction, ertn.
>>>>> It hardware-jump to the CSR.ERA address directly, jump into 
>>>>> kvm_exc_entry.
>>>>>
>>>>> At this point, the stack layout looks like this:
>>>>> -------------------------------
>>>>>   frame from call to `kvm_enter_guest`
>>>>> -------------------------------  <- $sp
>>>>>   PT_REGS
>>>>> -------------------------------  <- $a2
>>>>>
>>>>> Then kvm_exc_entry adjust stack without save any register (e.g. 
>>>>> $ra, $sp)
>>>>> but still marked UNWIND_HINT_REGS.
>>>>> After the adjustment:
>>>>> -------------------------------
>>>>>   frame from call to `kvm_enter_guest`
>>>>> -------------------------------
>>>>>   PT_REGS
>>>>> -------------------------------  <- $a2, new $sp
>>>>>
>>>>> During unwinding, when the unwinder reaches kvm_exc_entry,
>>>>> it meets the mark of PT_REGS and correctly recovers
>>>>>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
>>>>>
>>>> Yes, here unwinder does work as you say.
>>>>
>>>>> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
>>>>> instead, like call?
>>>> No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
>>>> at the same time to ensure that its atomic,
>>>> there should be no other instruction than' ertn 'more appropriate here.
>>> You are right! I got it.
>>>>
>>>>> The kvm_exc_entry cannot back to kvm_enter_guest
>>>>> if we use "ertn", so should the kvm_enter_guest appear on the 
>>>>> stacktrace?
>>>>>
>>>>
>>>> It is flexible. As I mentioned above, the cpu completes the switch 
>>>> from host-mode to guest mode through kvm_enter_guest,
>>>> and then the switch from guest mode to host-mode through 
>>>> kvm_exc_entry. When we ignore the details of the host-mode
>>>> and guest-mode switching in the middle, we can understand that the 
>>>> host cpu has completed kvm_enter_guest->kvm_exc_entry.
>>>> From this perspective, I think it can exist in the call stack, and 
>>>> at the same time, we have obtained the maximum call stack information.
>>>>
>>>>
>>>>> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
>>>>> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
>>>>> like ret_from_kernel_thread_asm.
>>>>>
>>>> The following command can be used to dump the orc entries of the 
>>>> kernel:
>>>> ./tools/objtool/objtool --dump vmlinux
>>>>
>>>> You can observe that not all orc entries are generated at the 
>>>> beginning of the function.
>>>> For example:
>>>> handle_tlb_protect
>>>> ftrace_stub
>>>> handle_reserved
>>>>
>>>> So, is it unnecessary for us to modify UNWIND_HINT_REGS in order to 
>>>> place it at the beginning of the function.
>>>>
>>>> If you have a better solution, could you provide an example of the 
>>>> modification?
>>>> I can test the feasibility of the solution.
>>>>
>>> The expression at the beginning of the function is incorrect (feeling 
>>> sorry).
>>> It should be marked where have all stacktrace info.
>>> Thanks for all the explaining, since I'm unfamiliar with kvm, I need 
>>> these to help my understanding.
>>>
>>> Can you try with follows, with save regs by $sp, set more precise era 
>>> to pt_regs, and more unwind hint.
>>>
>>>
>>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>>> index f1768b7a6194..8ed1d7b72c54 100644
>>> --- a/arch/loongarch/kvm/switch.S
>>> +++ b/arch/loongarch/kvm/switch.S
>>> @@ -14,13 +14,13 @@
>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>
>>>   .macro kvm_save_host_gpr base
>>> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>>> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>>>       st.d    $r\n, \base, HGPR_OFFSET(\n)
>>>       .endr
>>>   .endm
>>>
>>>   .macro kvm_restore_host_gpr base
>>> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>>> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>>>       ld.d    $r\n, \base, HGPR_OFFSET(\n)
>>>       .endr
>>>   .endm
>>> @@ -88,6 +88,7 @@
>>>       /* Load KVM_ARCH register */
>>>       ld.d    a2, a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
>>>
>>> +111:
>>>       ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, 
>>> TLBRPRMD.ISTLBR = 0 */
>>>   .endm
>>>
>>> @@ -158,9 +159,10 @@ SYM_CODE_START(kvm_exc_entry)
>>>       csrwr    t0, LOONGARCH_CSR_GTLBC
>>>       ld.d    tp, a2, KVM_ARCH_HTP
>>>       ld.d    sp, a2, KVM_ARCH_HSP
>>> +    UNWIND_HINT_REGS
>>> +
>>>       /* restore per cpu register */
>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>> -    addi.d    sp, sp, -PT_SIZE
>>>
>>>       /* Prepare handle exception */
>>>       or    a0, s0, zero
>>> @@ -184,10 +186,11 @@ SYM_CODE_START(kvm_exc_entry)
>>>       csrwr    s1, KVM_VCPU_KS
>>>       kvm_switch_to_guest
>>>
>>> +    UNWIND_HINT_UNDEFINED
>>>   ret_to_host:
>>> -    ld.d    a2, a2, KVM_ARCH_HSP
>>> -    addi.d  a2, a2, -PT_SIZE
>>> -    kvm_restore_host_gpr    a2
>>> +    ld.d    sp, a2, KVM_ARCH_HSP
>>> +    kvm_restore_host_gpr    sp
>>> +    addi.d    sp, sp, PT_SIZE
>>>       jr      ra
>>>
>>>   SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
>>> @@ -200,11 +203,15 @@ SYM_CODE_END(kvm_exc_entry)
>>>    *  a0: kvm_run* run
>>>    *  a1: kvm_vcpu* vcpu
>>>    */
>>> -SYM_FUNC_START(kvm_enter_guest)
>>> +SYM_CODE_START(kvm_enter_guest)
>>> +    UNWIND_HINT_UNDEFINED
>>>       /* Allocate space in stack bottom */
>>> -    addi.d    a2, sp, -PT_SIZE
>>> +    addi.d    sp, sp, -PT_SIZE
>>>       /* Save host GPRs */
>>> -    kvm_save_host_gpr a2
>>> +    kvm_save_host_gpr sp
>>> +    la.pcrel a2, 111f
>>> +    st.d     a2, sp, PT_ERA
>>> +    UNWIND_HINT_REGS
>>>
>> why the label 111f is more accurate?  Supposing there is hw breakpoint 
>> here and backtrace is called, what is the call trace stack then? 
>> obvious label 111f is not executed instead.
> Xianglai said marking it as regs can get more stack infos, so I use
> UNWIND_HINT_REGS marked here, though it not called. Remove
> UNWIND_HINT_REGS thenforbid unwind from here.
> This function is called and should usually be marked as "call",
> but it is complex by switching the stack and use `ertn` calls
> another function.
> 
> 
>>
>> UNWIND_HINT_REGS is used for nested kernel stack, is that right?
>> With nested interrupt and exception handlers on LoongArch kernel, is 
>> UNWIND_HINT_REGS used?
>>
>> SYM_CODE_START(ret_from_fork_asm)
>>         UNWIND_HINT_REGS
>>         move            a1, sp
>>         bl              ret_from_fork
>>         STACKLEAK_ERASE
>>         RESTORE_STATIC
>>         RESTORE_SOME
>>         RESTORE_SP_AND_RET
>> SYM_CODE_END(ret_from_fork_asm)
>> With this piece of code, what is contents of pt_regs? In generic it is 
>> called from sys_clone, era is user PC address, is that right? If so,
>> what is detailed usage in the beginning of ret_from_fork_asm?
> The stacktrace shows the control flow where the PC will go back, so
> it is right because when PC is in ret_from_fork_asm, it is already
> another thread. The era means it will go back user mode.
The problem is that user mode era shows unwind with error, and 
user_mode(regs) is not accurate. here is piece of code.
                 pc = regs->csr_era;
                 if (!__kernel_text_address(pc))
                         goto err;
will UNWIND_HINT_END_OF_STACK be better than UNWIND_HINT_REGS?

Regards
Bibo Mao
> 
> Jinyang
>>
>> Regards
>> Bibo Mao
>>
>>>       addi.d    a2, a1, KVM_VCPU_ARCH
>>>       st.d    sp, a2, KVM_ARCH_HSP
>>>
>>> Jinyang
>>>
> 

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Jinyang He 1 month, 1 week ago
On 2025-12-30 12:03, Bibo Mao wrote:

>
>
> On 2025/12/30 上午11:36, Jinyang He wrote:
>> On 2025-12-30 10:24, Bibo Mao wrote:
>>
>>>
>>>
>>> On 2025/12/29 下午6:41, Jinyang He wrote:
>>>> On 2025-12-29 18:11, lixianglai wrote:
>>>>
>>>>> Hi Jinyang:
>>>>>>
>>>>>> On 2025-12-29 11:53, lixianglai wrote:
>>>>>>> Hi Jinyang:
>>>>>>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>>>>>>
>>>>>>>>> Insert the appropriate UNWIND macro definition into the 
>>>>>>>>> kvm_exc_entry in
>>>>>>>>> the assembly function to guide the generation of correct ORC 
>>>>>>>>> table entries,
>>>>>>>>> thereby solving the timeout problem of loading the 
>>>>>>>>> livepatch-sample module
>>>>>>>>> on a physical machine running multiple vcpus virtual machines.
>>>>>>>>>
>>>>>>>>> While solving the above problems, we have gained an additional 
>>>>>>>>> benefit,
>>>>>>>>> that is, we can obtain more call stack information
>>>>>>>>>
>>>>>>>>> Stack information that can be obtained before the problem is 
>>>>>>>>> fixed:
>>>>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>>>>>>
>>>>>>>>> Stack information that can be obtained after the problem is 
>>>>>>>>> fixed:
>>>>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>>>>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>>>>> [<0>] sys_ioctl+0x498/0xf00
>>>>>>>>> [<0>] do_syscall+0x98/0x1d0
>>>>>>>>> [<0>] handle_syscall+0xb8/0x158
>>>>>>>>>
>>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>>>>>>> ---
>>>>>>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>>>>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>>>>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>>>>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>>>>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>>>>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>>>>>>
>>>>>>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>>>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/loongarch/kvm/switch.S 
>>>>>>>>> b/arch/loongarch/kvm/switch.S
>>>>>>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>>>>>>> --- a/arch/loongarch/kvm/switch.S
>>>>>>>>> +++ b/arch/loongarch/kvm/switch.S
>>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>>>   #include <asm/loongarch.h>
>>>>>>>>>   #include <asm/regdef.h>
>>>>>>>>>   #include <asm/unwind_hints.h>
>>>>>>>>> +#include <linux/kvm_types.h>
>>>>>>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>>>>>> @@ -110,9 +111,9 @@
>>>>>>>>>        * need to copy world switch code to DMW area.
>>>>>>>>>        */
>>>>>>>>>       .text
>>>>>>>>> +    .p2align PAGE_SHIFT
>>>>>>>>>       .cfi_sections    .debug_frame
>>>>>>>>>   SYM_CODE_START(kvm_exc_entry)
>>>>>>>>> -    .p2align PAGE_SHIFT
>>>>>>>>>       UNWIND_HINT_UNDEFINED
>>>>>>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>>>>>>       csrrd    a2,   KVM_VCPU_KS
>>>>>>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>>>>>       /* restore per cpu register */
>>>>>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>>>>>>       addi.d    sp, sp, -PT_SIZE
>>>>>>>>> +    UNWIND_HINT_REGS
>>>>>>>>>         /* Prepare handle exception */
>>>>>>>>>       or    a0, s0, zero
>>>>>>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>>>>>>       jr      ra
>>>>>>>>>     SYM_CODE_END(kvm_exc_entry)
>>>>>>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>>>>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>>>>>>     /*
>>>>>>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu 
>>>>>>>>> *vcpu)
>>>>>>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>>>>>>       /* Save host GPRs */
>>>>>>>>>       kvm_save_host_gpr a2
>>>>>>>>>   +    /*
>>>>>>>>> +     * The csr_era member variable of the pt_regs structure 
>>>>>>>>> is required
>>>>>>>>> +     * for unwinding orc to perform stack traceback, so we 
>>>>>>>>> need to put
>>>>>>>>> +     * pc into csr_era member variable here.
>>>>>>>>> +     */
>>>>>>>>> +    pcaddi    t0, 0
>>>>>>>>> +    st.d    t0, a2, PT_ERA
>>>>>>>> Hi, Xianglai,
>>>>>>>>
>>>>>>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` 
>>>>>>>> rather than
>>>>>>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark 
>>>>>>>> "C-likely"
>>>>>>>> asm functionw. 
>>>>>>>
>>>>>>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the 
>>>>>>> next version.
>>>>>>>
>>>>>>>> I guess the kvm_enter_guest is something like exception
>>>>>>>> handler becuase the last instruction is "ertn". So usually it 
>>>>>>>> should
>>>>>>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>>>>>>> However, all info is store to "$a2", this mark should be
>>>>>>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>>>>>>> I don't konw why save this function internal PC here by `pcaddi 
>>>>>>>> t0, 0`,
>>>>>>>> and I think it is no meaning(, for exception handler, they save 
>>>>>>>> last PC
>>>>>>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>>>>>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>>>>>>> is enable. So I wonder if there is really a stacktrace through 
>>>>>>>> this function?
>>>>>>>>
>>>>>>> The stack backtracking issue in switch.S is rather complex 
>>>>>>> because it involves the switching between cpu root-mode and 
>>>>>>> guest-mode:
>>>>>>> Real stack backtracking should be divided into two parts:
>>>>>>> part 1:
>>>>>>>     [<0>] kvm_enter_guest+0x38/0x11c
>>>>>>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>>>     [<0>] sys_ioctl+0x498/0xf00
>>>>>>>     [<0>] do_syscall+0x98/0x1d0
>>>>>>>     [<0>] handle_syscall+0xb8/0x158
>>>>>>>
>>>>>>> part 2:
>>>>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>>>
>>>>>>>
>>>>>>> In "part 1", after executing kvm_enter_guest, the cpu switches 
>>>>>>> from root-mode to guest-mode.
>>>>>>> In this case, stack backtracking is indeed very rare.
>>>>>>>
>>>>>>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>>>>>>> and most of the stack backtracking occurs during this phase.
>>>>>>>
>>>>>>> To obtain the longest call chain, we save pc in kvm_enter_guest 
>>>>>>> to pt_regs.csr_era,
>>>>>>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>>>>>>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>>>>>>  and then we obtained the following stack backtrace as we wanted:
>>>>>>>
>>>>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>> I found this might be a coincidence—correct behavior due to the 
>>>>>> incorrect
>>>>>> UNWIND_HINT_REGS mark and unusual stack adjustment.
>>>>>>
>>>>>> First, the kvm_enter_guest contains only a single branch 
>>>>>> instruction, ertn.
>>>>>> It hardware-jump to the CSR.ERA address directly, jump into 
>>>>>> kvm_exc_entry.
>>>>>>
>>>>>> At this point, the stack layout looks like this:
>>>>>> -------------------------------
>>>>>>   frame from call to `kvm_enter_guest`
>>>>>> -------------------------------  <- $sp
>>>>>>   PT_REGS
>>>>>> -------------------------------  <- $a2
>>>>>>
>>>>>> Then kvm_exc_entry adjust stack without save any register (e.g. 
>>>>>> $ra, $sp)
>>>>>> but still marked UNWIND_HINT_REGS.
>>>>>> After the adjustment:
>>>>>> -------------------------------
>>>>>>   frame from call to `kvm_enter_guest`
>>>>>> -------------------------------
>>>>>>   PT_REGS
>>>>>> -------------------------------  <- $a2, new $sp
>>>>>>
>>>>>> During unwinding, when the unwinder reaches kvm_exc_entry,
>>>>>> it meets the mark of PT_REGS and correctly recovers
>>>>>>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
>>>>>>
>>>>> Yes, here unwinder does work as you say.
>>>>>
>>>>>> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
>>>>>> instead, like call?
>>>>> No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
>>>>> at the same time to ensure that its atomic,
>>>>> there should be no other instruction than' ertn 'more appropriate 
>>>>> here.
>>>> You are right! I got it.
>>>>>
>>>>>> The kvm_exc_entry cannot back to kvm_enter_guest
>>>>>> if we use "ertn", so should the kvm_enter_guest appear on the 
>>>>>> stacktrace?
>>>>>>
>>>>>
>>>>> It is flexible. As I mentioned above, the cpu completes the switch 
>>>>> from host-mode to guest mode through kvm_enter_guest,
>>>>> and then the switch from guest mode to host-mode through 
>>>>> kvm_exc_entry. When we ignore the details of the host-mode
>>>>> and guest-mode switching in the middle, we can understand that the 
>>>>> host cpu has completed kvm_enter_guest->kvm_exc_entry.
>>>>> From this perspective, I think it can exist in the call stack, and 
>>>>> at the same time, we have obtained the maximum call stack 
>>>>> information.
>>>>>
>>>>>
>>>>>> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
>>>>>> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
>>>>>> like ret_from_kernel_thread_asm.
>>>>>>
>>>>> The following command can be used to dump the orc entries of the 
>>>>> kernel:
>>>>> ./tools/objtool/objtool --dump vmlinux
>>>>>
>>>>> You can observe that not all orc entries are generated at the 
>>>>> beginning of the function.
>>>>> For example:
>>>>> handle_tlb_protect
>>>>> ftrace_stub
>>>>> handle_reserved
>>>>>
>>>>> So, is it unnecessary for us to modify UNWIND_HINT_REGS in order 
>>>>> to place it at the beginning of the function.
>>>>>
>>>>> If you have a better solution, could you provide an example of the 
>>>>> modification?
>>>>> I can test the feasibility of the solution.
>>>>>
>>>> The expression at the beginning of the function is incorrect 
>>>> (feeling sorry).
>>>> It should be marked where have all stacktrace info.
>>>> Thanks for all the explaining, since I'm unfamiliar with kvm, I 
>>>> need these to help my understanding.
>>>>
>>>> Can you try with follows, with save regs by $sp, set more precise 
>>>> era to pt_regs, and more unwind hint.
>>>>
>>>>
>>>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>>>> index f1768b7a6194..8ed1d7b72c54 100644
>>>> --- a/arch/loongarch/kvm/switch.S
>>>> +++ b/arch/loongarch/kvm/switch.S
>>>> @@ -14,13 +14,13 @@
>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>
>>>>   .macro kvm_save_host_gpr base
>>>> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>>>> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>>>>       st.d    $r\n, \base, HGPR_OFFSET(\n)
>>>>       .endr
>>>>   .endm
>>>>
>>>>   .macro kvm_restore_host_gpr base
>>>> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>>>> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>>>>       ld.d    $r\n, \base, HGPR_OFFSET(\n)
>>>>       .endr
>>>>   .endm
>>>> @@ -88,6 +88,7 @@
>>>>       /* Load KVM_ARCH register */
>>>>       ld.d    a2, a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
>>>>
>>>> +111:
>>>>       ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, 
>>>> TLBRPRMD.ISTLBR = 0 */
>>>>   .endm
>>>>
>>>> @@ -158,9 +159,10 @@ SYM_CODE_START(kvm_exc_entry)
>>>>       csrwr    t0, LOONGARCH_CSR_GTLBC
>>>>       ld.d    tp, a2, KVM_ARCH_HTP
>>>>       ld.d    sp, a2, KVM_ARCH_HSP
>>>> +    UNWIND_HINT_REGS
>>>> +
>>>>       /* restore per cpu register */
>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>> -    addi.d    sp, sp, -PT_SIZE
>>>>
>>>>       /* Prepare handle exception */
>>>>       or    a0, s0, zero
>>>> @@ -184,10 +186,11 @@ SYM_CODE_START(kvm_exc_entry)
>>>>       csrwr    s1, KVM_VCPU_KS
>>>>       kvm_switch_to_guest
>>>>
>>>> +    UNWIND_HINT_UNDEFINED
>>>>   ret_to_host:
>>>> -    ld.d    a2, a2, KVM_ARCH_HSP
>>>> -    addi.d  a2, a2, -PT_SIZE
>>>> -    kvm_restore_host_gpr    a2
>>>> +    ld.d    sp, a2, KVM_ARCH_HSP
>>>> +    kvm_restore_host_gpr    sp
>>>> +    addi.d    sp, sp, PT_SIZE
>>>>       jr      ra
>>>>
>>>>   SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
>>>> @@ -200,11 +203,15 @@ SYM_CODE_END(kvm_exc_entry)
>>>>    *  a0: kvm_run* run
>>>>    *  a1: kvm_vcpu* vcpu
>>>>    */
>>>> -SYM_FUNC_START(kvm_enter_guest)
>>>> +SYM_CODE_START(kvm_enter_guest)
>>>> +    UNWIND_HINT_UNDEFINED
>>>>       /* Allocate space in stack bottom */
>>>> -    addi.d    a2, sp, -PT_SIZE
>>>> +    addi.d    sp, sp, -PT_SIZE
>>>>       /* Save host GPRs */
>>>> -    kvm_save_host_gpr a2
>>>> +    kvm_save_host_gpr sp
>>>> +    la.pcrel a2, 111f
>>>> +    st.d     a2, sp, PT_ERA
>>>> +    UNWIND_HINT_REGS
>>>>
>>> why the label 111f is more accurate?  Supposing there is hw 
>>> breakpoint here and backtrace is called, what is the call trace 
>>> stack then? obvious label 111f is not executed instead.
>> Xianglai said marking it as regs can get more stack infos, so I use
>> UNWIND_HINT_REGS marked here, though it not called. Remove
>> UNWIND_HINT_REGS thenforbid unwind from here.
>> This function is called and should usually be marked as "call",
>> but it is complex by switching the stack and use `ertn` calls
>> another function.
>>
>>
>>>
>>> UNWIND_HINT_REGS is used for nested kernel stack, is that right?
>>> With nested interrupt and exception handlers on LoongArch kernel, is 
>>> UNWIND_HINT_REGS used?
>>>
>>> SYM_CODE_START(ret_from_fork_asm)
>>>         UNWIND_HINT_REGS
>>>         move            a1, sp
>>>         bl              ret_from_fork
>>>         STACKLEAK_ERASE
>>>         RESTORE_STATIC
>>>         RESTORE_SOME
>>>         RESTORE_SP_AND_RET
>>> SYM_CODE_END(ret_from_fork_asm)
>>> With this piece of code, what is contents of pt_regs? In generic it 
>>> is called from sys_clone, era is user PC address, is that right? If so,
>>> what is detailed usage in the beginning of ret_from_fork_asm?
>> The stacktrace shows the control flow where the PC will go back, so
>> it is right because when PC is in ret_from_fork_asm, it is already
>> another thread. The era means it will go back user mode.
> The problem is that user mode era shows unwind with error, and 
> user_mode(regs) is not accurate. here is piece of code.
>                 pc = regs->csr_era;
>                 if (!__kernel_text_address(pc))
>                         goto err;
> will UNWIND_HINT_END_OF_STACK be better than UNWIND_HINT_REGS?

You are right. And the reason why current the unwinder does not cause
error is in case ORC_TYPE_REGS we process it by user_mode(regs).

Jinyang

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Bibo Mao 1 month, 1 week ago

On 2025/12/30 下午1:53, Jinyang He wrote:
> On 2025-12-30 12:03, Bibo Mao wrote:
> 
>>
>>
>> On 2025/12/30 上午11:36, Jinyang He wrote:
>>> On 2025-12-30 10:24, Bibo Mao wrote:
>>>
>>>>
>>>>
>>>> On 2025/12/29 下午6:41, Jinyang He wrote:
>>>>> On 2025-12-29 18:11, lixianglai wrote:
>>>>>
>>>>>> Hi Jinyang:
>>>>>>>
>>>>>>> On 2025-12-29 11:53, lixianglai wrote:
>>>>>>>> Hi Jinyang:
>>>>>>>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>>>>>>>
>>>>>>>>>> Insert the appropriate UNWIND macro definition into the 
>>>>>>>>>> kvm_exc_entry in
>>>>>>>>>> the assembly function to guide the generation of correct ORC 
>>>>>>>>>> table entries,
>>>>>>>>>> thereby solving the timeout problem of loading the 
>>>>>>>>>> livepatch-sample module
>>>>>>>>>> on a physical machine running multiple vcpus virtual machines.
>>>>>>>>>>
>>>>>>>>>> While solving the above problems, we have gained an additional 
>>>>>>>>>> benefit,
>>>>>>>>>> that is, we can obtain more call stack information
>>>>>>>>>>
>>>>>>>>>> Stack information that can be obtained before the problem is 
>>>>>>>>>> fixed:
>>>>>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>>>>>>>
>>>>>>>>>> Stack information that can be obtained after the problem is 
>>>>>>>>>> fixed:
>>>>>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>>>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>>>>>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>>>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>>>>>> [<0>] sys_ioctl+0x498/0xf00
>>>>>>>>>> [<0>] do_syscall+0x98/0x1d0
>>>>>>>>>> [<0>] handle_syscall+0xb8/0x158
>>>>>>>>>>
>>>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>>>>>>>> ---
>>>>>>>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>>>>>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>>>>>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>>>>>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>>>>>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>>>>>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>>>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>>>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>>>>>>>
>>>>>>>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>>>>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/loongarch/kvm/switch.S 
>>>>>>>>>> b/arch/loongarch/kvm/switch.S
>>>>>>>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>>>>>>>> --- a/arch/loongarch/kvm/switch.S
>>>>>>>>>> +++ b/arch/loongarch/kvm/switch.S
>>>>>>>>>> @@ -10,6 +10,7 @@
>>>>>>>>>>   #include <asm/loongarch.h>
>>>>>>>>>>   #include <asm/regdef.h>
>>>>>>>>>>   #include <asm/unwind_hints.h>
>>>>>>>>>> +#include <linux/kvm_types.h>
>>>>>>>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>>>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>>>>>>> @@ -110,9 +111,9 @@
>>>>>>>>>>        * need to copy world switch code to DMW area.
>>>>>>>>>>        */
>>>>>>>>>>       .text
>>>>>>>>>> +    .p2align PAGE_SHIFT
>>>>>>>>>>       .cfi_sections    .debug_frame
>>>>>>>>>>   SYM_CODE_START(kvm_exc_entry)
>>>>>>>>>> -    .p2align PAGE_SHIFT
>>>>>>>>>>       UNWIND_HINT_UNDEFINED
>>>>>>>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>>>>>>>       csrrd    a2,   KVM_VCPU_KS
>>>>>>>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>>>>>>       /* restore per cpu register */
>>>>>>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>>>>>>>       addi.d    sp, sp, -PT_SIZE
>>>>>>>>>> +    UNWIND_HINT_REGS
>>>>>>>>>>         /* Prepare handle exception */
>>>>>>>>>>       or    a0, s0, zero
>>>>>>>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>>>>>>>       jr      ra
>>>>>>>>>>     SYM_CODE_END(kvm_exc_entry)
>>>>>>>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>>>>>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>>>>>>>     /*
>>>>>>>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu 
>>>>>>>>>> *vcpu)
>>>>>>>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>>>>>>>       /* Save host GPRs */
>>>>>>>>>>       kvm_save_host_gpr a2
>>>>>>>>>>   +    /*
>>>>>>>>>> +     * The csr_era member variable of the pt_regs structure 
>>>>>>>>>> is required
>>>>>>>>>> +     * for unwinding orc to perform stack traceback, so we 
>>>>>>>>>> need to put
>>>>>>>>>> +     * pc into csr_era member variable here.
>>>>>>>>>> +     */
>>>>>>>>>> +    pcaddi    t0, 0
>>>>>>>>>> +    st.d    t0, a2, PT_ERA
>>>>>>>>> Hi, Xianglai,
>>>>>>>>>
>>>>>>>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` 
>>>>>>>>> rather than
>>>>>>>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark 
>>>>>>>>> "C-likely"
>>>>>>>>> asm functionw. 
>>>>>>>>
>>>>>>>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the 
>>>>>>>> next version.
>>>>>>>>
>>>>>>>>> I guess the kvm_enter_guest is something like exception
>>>>>>>>> handler becuase the last instruction is "ertn". So usually it 
>>>>>>>>> should
>>>>>>>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>>>>>>>> However, all info is store to "$a2", this mark should be
>>>>>>>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>>>>>>>> I don't konw why save this function internal PC here by `pcaddi 
>>>>>>>>> t0, 0`,
>>>>>>>>> and I think it is no meaning(, for exception handler, they save 
>>>>>>>>> last PC
>>>>>>>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>>>>>>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>>>>>>>> is enable. So I wonder if there is really a stacktrace through 
>>>>>>>>> this function?
>>>>>>>>>
>>>>>>>> The stack backtracking issue in switch.S is rather complex 
>>>>>>>> because it involves the switching between cpu root-mode and 
>>>>>>>> guest-mode:
>>>>>>>> Real stack backtracking should be divided into two parts:
>>>>>>>> part 1:
>>>>>>>>     [<0>] kvm_enter_guest+0x38/0x11c
>>>>>>>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>>>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>>>>     [<0>] sys_ioctl+0x498/0xf00
>>>>>>>>     [<0>] do_syscall+0x98/0x1d0
>>>>>>>>     [<0>] handle_syscall+0xb8/0x158
>>>>>>>>
>>>>>>>> part 2:
>>>>>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>>>>
>>>>>>>>
>>>>>>>> In "part 1", after executing kvm_enter_guest, the cpu switches 
>>>>>>>> from root-mode to guest-mode.
>>>>>>>> In this case, stack backtracking is indeed very rare.
>>>>>>>>
>>>>>>>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>>>>>>>> and most of the stack backtracking occurs during this phase.
>>>>>>>>
>>>>>>>> To obtain the longest call chain, we save pc in kvm_enter_guest 
>>>>>>>> to pt_regs.csr_era,
>>>>>>>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>>>>>>>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>>>>>>>  and then we obtained the following stack backtrace as we wanted:
>>>>>>>>
>>>>>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>>> I found this might be a coincidence—correct behavior due to the 
>>>>>>> incorrect
>>>>>>> UNWIND_HINT_REGS mark and unusual stack adjustment.
>>>>>>>
>>>>>>> First, the kvm_enter_guest contains only a single branch 
>>>>>>> instruction, ertn.
>>>>>>> It hardware-jump to the CSR.ERA address directly, jump into 
>>>>>>> kvm_exc_entry.
>>>>>>>
>>>>>>> At this point, the stack layout looks like this:
>>>>>>> -------------------------------
>>>>>>>   frame from call to `kvm_enter_guest`
>>>>>>> -------------------------------  <- $sp
>>>>>>>   PT_REGS
>>>>>>> -------------------------------  <- $a2
>>>>>>>
>>>>>>> Then kvm_exc_entry adjust stack without save any register (e.g. 
>>>>>>> $ra, $sp)
>>>>>>> but still marked UNWIND_HINT_REGS.
>>>>>>> After the adjustment:
>>>>>>> -------------------------------
>>>>>>>   frame from call to `kvm_enter_guest`
>>>>>>> -------------------------------
>>>>>>>   PT_REGS
>>>>>>> -------------------------------  <- $a2, new $sp
>>>>>>>
>>>>>>> During unwinding, when the unwinder reaches kvm_exc_entry,
>>>>>>> it meets the mark of PT_REGS and correctly recovers
>>>>>>>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
>>>>>>>
>>>>>> Yes, here unwinder does work as you say.
>>>>>>
>>>>>>> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
>>>>>>> instead, like call?
>>>>>> No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
>>>>>> at the same time to ensure that its atomic,
>>>>>> there should be no other instruction than' ertn 'more appropriate 
>>>>>> here.
>>>>> You are right! I got it.
>>>>>>
>>>>>>> The kvm_exc_entry cannot back to kvm_enter_guest
>>>>>>> if we use "ertn", so should the kvm_enter_guest appear on the 
>>>>>>> stacktrace?
>>>>>>>
>>>>>>
>>>>>> It is flexible. As I mentioned above, the cpu completes the switch 
>>>>>> from host-mode to guest mode through kvm_enter_guest,
>>>>>> and then the switch from guest mode to host-mode through 
>>>>>> kvm_exc_entry. When we ignore the details of the host-mode
>>>>>> and guest-mode switching in the middle, we can understand that the 
>>>>>> host cpu has completed kvm_enter_guest->kvm_exc_entry.
>>>>>> From this perspective, I think it can exist in the call stack, and 
>>>>>> at the same time, we have obtained the maximum call stack 
>>>>>> information.
>>>>>>
>>>>>>
>>>>>>> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
>>>>>>> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
>>>>>>> like ret_from_kernel_thread_asm.
>>>>>>>
>>>>>> The following command can be used to dump the orc entries of the 
>>>>>> kernel:
>>>>>> ./tools/objtool/objtool --dump vmlinux
>>>>>>
>>>>>> You can observe that not all orc entries are generated at the 
>>>>>> beginning of the function.
>>>>>> For example:
>>>>>> handle_tlb_protect
>>>>>> ftrace_stub
>>>>>> handle_reserved
>>>>>>
>>>>>> So, is it unnecessary for us to modify UNWIND_HINT_REGS in order 
>>>>>> to place it at the beginning of the function.
>>>>>>
>>>>>> If you have a better solution, could you provide an example of the 
>>>>>> modification?
>>>>>> I can test the feasibility of the solution.
>>>>>>
>>>>> The expression at the beginning of the function is incorrect 
>>>>> (feeling sorry).
>>>>> It should be marked where have all stacktrace info.
>>>>> Thanks for all the explaining, since I'm unfamiliar with kvm, I 
>>>>> need these to help my understanding.
>>>>>
>>>>> Can you try with follows, with save regs by $sp, set more precise 
>>>>> era to pt_regs, and more unwind hint.
>>>>>
>>>>>
>>>>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>>>>> index f1768b7a6194..8ed1d7b72c54 100644
>>>>> --- a/arch/loongarch/kvm/switch.S
>>>>> +++ b/arch/loongarch/kvm/switch.S
>>>>> @@ -14,13 +14,13 @@
>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>>
>>>>>   .macro kvm_save_host_gpr base
>>>>> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>>>>> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>>>>>       st.d    $r\n, \base, HGPR_OFFSET(\n)
>>>>>       .endr
>>>>>   .endm
>>>>>
>>>>>   .macro kvm_restore_host_gpr base
>>>>> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>>>>> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>>>>>       ld.d    $r\n, \base, HGPR_OFFSET(\n)
>>>>>       .endr
>>>>>   .endm
>>>>> @@ -88,6 +88,7 @@
>>>>>       /* Load KVM_ARCH register */
>>>>>       ld.d    a2, a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
>>>>>
>>>>> +111:
>>>>>       ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, 
>>>>> TLBRPRMD.ISTLBR = 0 */
>>>>>   .endm
>>>>>
>>>>> @@ -158,9 +159,10 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>       csrwr    t0, LOONGARCH_CSR_GTLBC
>>>>>       ld.d    tp, a2, KVM_ARCH_HTP
>>>>>       ld.d    sp, a2, KVM_ARCH_HSP
>>>>> +    UNWIND_HINT_REGS
>>>>> +
>>>>>       /* restore per cpu register */
>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>> -    addi.d    sp, sp, -PT_SIZE
>>>>>
>>>>>       /* Prepare handle exception */
>>>>>       or    a0, s0, zero
>>>>> @@ -184,10 +186,11 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>       csrwr    s1, KVM_VCPU_KS
>>>>>       kvm_switch_to_guest
>>>>>
>>>>> +    UNWIND_HINT_UNDEFINED
>>>>>   ret_to_host:
>>>>> -    ld.d    a2, a2, KVM_ARCH_HSP
>>>>> -    addi.d  a2, a2, -PT_SIZE
>>>>> -    kvm_restore_host_gpr    a2
>>>>> +    ld.d    sp, a2, KVM_ARCH_HSP
>>>>> +    kvm_restore_host_gpr    sp
>>>>> +    addi.d    sp, sp, PT_SIZE
>>>>>       jr      ra
>>>>>
>>>>>   SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
>>>>> @@ -200,11 +203,15 @@ SYM_CODE_END(kvm_exc_entry)
>>>>>    *  a0: kvm_run* run
>>>>>    *  a1: kvm_vcpu* vcpu
>>>>>    */
>>>>> -SYM_FUNC_START(kvm_enter_guest)
>>>>> +SYM_CODE_START(kvm_enter_guest)
>>>>> +    UNWIND_HINT_UNDEFINED
>>>>>       /* Allocate space in stack bottom */
>>>>> -    addi.d    a2, sp, -PT_SIZE
>>>>> +    addi.d    sp, sp, -PT_SIZE
>>>>>       /* Save host GPRs */
>>>>> -    kvm_save_host_gpr a2
>>>>> +    kvm_save_host_gpr sp
>>>>> +    la.pcrel a2, 111f
>>>>> +    st.d     a2, sp, PT_ERA
>>>>> +    UNWIND_HINT_REGS
>>>>>
>>>> why the label 111f is more accurate?  Supposing there is hw 
>>>> breakpoint here and backtrace is called, what is the call trace 
>>>> stack then? obvious label 111f is not executed instead.
>>> Xianglai said marking it as regs can get more stack infos, so I use
>>> UNWIND_HINT_REGS marked here, though it not called. Remove
>>> UNWIND_HINT_REGS thenforbid unwind from here.
>>> This function is called and should usually be marked as "call",
>>> but it is complex by switching the stack and use `ertn` calls
>>> another function.
>>>
>>>
>>>>
>>>> UNWIND_HINT_REGS is used for nested kernel stack, is that right?
>>>> With nested interrupt and exception handlers on LoongArch kernel, is 
>>>> UNWIND_HINT_REGS used?
>>>>
>>>> SYM_CODE_START(ret_from_fork_asm)
>>>>         UNWIND_HINT_REGS
>>>>         move            a1, sp
>>>>         bl              ret_from_fork
>>>>         STACKLEAK_ERASE
>>>>         RESTORE_STATIC
>>>>         RESTORE_SOME
>>>>         RESTORE_SP_AND_RET
>>>> SYM_CODE_END(ret_from_fork_asm)
>>>> With this piece of code, what is contents of pt_regs? In generic it 
>>>> is called from sys_clone, era is user PC address, is that right? If so,
>>>> what is detailed usage in the beginning of ret_from_fork_asm?
>>> The stacktrace shows the control flow where the PC will go back, so
>>> it is right because when PC is in ret_from_fork_asm, it is already
>>> another thread. The era means it will go back user mode.
>> The problem is that user mode era shows unwind with error, and 
>> user_mode(regs) is not accurate. here is piece of code.
>>                 pc = regs->csr_era;
>>                 if (!__kernel_text_address(pc))
>>                         goto err;
>> will UNWIND_HINT_END_OF_STACK be better than UNWIND_HINT_REGS?
> 
> You are right. And the reason why current the unwinder does not cause
> error is in case ORC_TYPE_REGS we process it by user_mode(regs).
user_mode(regs) is unreliable with kernel thread at least:), csr_prmd is 
set for kernel thread with value csr_read32(LOONGARCH_CSR_PRMD), which 
is per-board rather than per thread in function copy_thread.

int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
         childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE;

         /* set up new TSS. */
         childregs = (struct pt_regs *) childksp - 1;
         /*  Put the stack after the struct pt_regs.  */
         childksp = (unsigned long) childregs;
         p->thread.csr_prmd = csr_read32(LOONGARCH_CSR_PRMD);
         if (unlikely(args->fn)) {
                 /* kernel thread */
                 childregs->csr_prmd = p->thread.csr_prmd;
                 goto out;
         }

Regards
Bibo Mao

> 
> Jinyang

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Bibo Mao 1 month, 1 week ago

On 2025/12/29 下午6:41, Jinyang He wrote:
> On 2025-12-29 18:11, lixianglai wrote:
> 
>> Hi Jinyang:
>>>
>>> On 2025-12-29 11:53, lixianglai wrote:
>>>> Hi Jinyang:
>>>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>>>
>>>>>> Insert the appropriate UNWIND macro definition into the 
>>>>>> kvm_exc_entry in
>>>>>> the assembly function to guide the generation of correct ORC table 
>>>>>> entries,
>>>>>> thereby solving the timeout problem of loading the 
>>>>>> livepatch-sample module
>>>>>> on a physical machine running multiple vcpus virtual machines.
>>>>>>
>>>>>> While solving the above problems, we have gained an additional 
>>>>>> benefit,
>>>>>> that is, we can obtain more call stack information
>>>>>>
>>>>>> Stack information that can be obtained before the problem is fixed:
>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>>>
>>>>>> Stack information that can be obtained after the problem is fixed:
>>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>>> [<0>] sys_ioctl+0x498/0xf00
>>>>>> [<0>] do_syscall+0x98/0x1d0
>>>>>> [<0>] handle_syscall+0xb8/0x158
>>>>>>
>>>>>> Cc: stable@vger.kernel.org
>>>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>>>> ---
>>>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>>>
>>>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/loongarch/kvm/switch.S 
>>>>>> b/arch/loongarch/kvm/switch.S
>>>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>>>> --- a/arch/loongarch/kvm/switch.S
>>>>>> +++ b/arch/loongarch/kvm/switch.S
>>>>>> @@ -10,6 +10,7 @@
>>>>>>   #include <asm/loongarch.h>
>>>>>>   #include <asm/regdef.h>
>>>>>>   #include <asm/unwind_hints.h>
>>>>>> +#include <linux/kvm_types.h>
>>>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>>>> @@ -110,9 +111,9 @@
>>>>>>        * need to copy world switch code to DMW area.
>>>>>>        */
>>>>>>       .text
>>>>>> +    .p2align PAGE_SHIFT
>>>>>>       .cfi_sections    .debug_frame
>>>>>>   SYM_CODE_START(kvm_exc_entry)
>>>>>> -    .p2align PAGE_SHIFT
>>>>>>       UNWIND_HINT_UNDEFINED
>>>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>>>       csrrd    a2,   KVM_VCPU_KS
>>>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>>>       /* restore per cpu register */
>>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>>>       addi.d    sp, sp, -PT_SIZE
>>>>>> +    UNWIND_HINT_REGS
>>>>>>         /* Prepare handle exception */
>>>>>>       or    a0, s0, zero
>>>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>>>       jr      ra
>>>>>>     SYM_CODE_END(kvm_exc_entry)
>>>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>>>     /*
>>>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>>>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>>>       /* Save host GPRs */
>>>>>>       kvm_save_host_gpr a2
>>>>>>   +    /*
>>>>>> +     * The csr_era member variable of the pt_regs structure is 
>>>>>> required
>>>>>> +     * for unwinding orc to perform stack traceback, so we need 
>>>>>> to put
>>>>>> +     * pc into csr_era member variable here.
>>>>>> +     */
>>>>>> +    pcaddi    t0, 0
>>>>>> +    st.d    t0, a2, PT_ERA
>>>>> Hi, Xianglai,
>>>>>
>>>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather 
>>>>> than
>>>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark 
>>>>> "C-likely"
>>>>> asm functionw. 
>>>>
>>>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next 
>>>> version.
>>>>
>>>>> I guess the kvm_enter_guest is something like exception
>>>>> handler becuase the last instruction is "ertn". So usually it should
>>>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>>>> However, all info is store to "$a2", this mark should be
>>>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>>>> I don't konw why save this function internal PC here by `pcaddi t0, 
>>>>> 0`,
>>>>> and I think it is no meaning(, for exception handler, they save 
>>>>> last PC
>>>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>>>> is enable. So I wonder if there is really a stacktrace through this 
>>>>> function?
>>>>>
>>>> The stack backtracking issue in switch.S is rather complex because 
>>>> it involves the switching between cpu root-mode and guest-mode:
>>>> Real stack backtracking should be divided into two parts:
>>>> part 1:
>>>>     [<0>] kvm_enter_guest+0x38/0x11c
>>>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>>     [<0>] sys_ioctl+0x498/0xf00
>>>>     [<0>] do_syscall+0x98/0x1d0
>>>>     [<0>] handle_syscall+0xb8/0x158
>>>>
>>>> part 2:
>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>>>
>>>>
>>>> In "part 1", after executing kvm_enter_guest, the cpu switches from 
>>>> root-mode to guest-mode.
>>>> In this case, stack backtracking is indeed very rare.
>>>>
>>>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>>>> and most of the stack backtracking occurs during this phase.
>>>>
>>>> To obtain the longest call chain, we save pc in kvm_enter_guest to 
>>>> pt_regs.csr_era,
>>>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>>>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>>>  and then we obtained the following stack backtrace as we wanted:
>>>>
>>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>> I found this might be a coincidence—correct behavior due to the 
>>> incorrect
>>> UNWIND_HINT_REGS mark and unusual stack adjustment.
>>>
>>> First, the kvm_enter_guest contains only a single branch instruction, 
>>> ertn.
>>> It hardware-jump to the CSR.ERA address directly, jump into 
>>> kvm_exc_entry.
>>>
>>> At this point, the stack layout looks like this:
>>> -------------------------------
>>>   frame from call to `kvm_enter_guest`
>>> -------------------------------  <- $sp
>>>   PT_REGS
>>> -------------------------------  <- $a2
>>>
>>> Then kvm_exc_entry adjust stack without save any register (e.g. $ra, 
>>> $sp)
>>> but still marked UNWIND_HINT_REGS.
>>> After the adjustment:
>>> -------------------------------
>>>   frame from call to `kvm_enter_guest`
>>> -------------------------------
>>>   PT_REGS
>>> -------------------------------  <- $a2, new $sp
>>>
>>> During unwinding, when the unwinder reaches kvm_exc_entry,
>>> it meets the mark of PT_REGS and correctly recovers
>>>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
>>>
>> Yes, here unwinder does work as you say.
>>
>>> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
>>> instead, like call?
>> No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
>> at the same time to ensure that its atomic,
>> there should be no other instruction than' ertn 'more appropriate here.
> You are right! I got it.
>>
>>> The kvm_exc_entry cannot back to kvm_enter_guest
>>> if we use "ertn", so should the kvm_enter_guest appear on the 
>>> stacktrace?
>>>
>>
>> It is flexible. As I mentioned above, the cpu completes the switch 
>> from host-mode to guest mode through kvm_enter_guest,
>> and then the switch from guest mode to host-mode through 
>> kvm_exc_entry. When we ignore the details of the host-mode
>> and guest-mode switching in the middle, we can understand that the 
>> host cpu has completed kvm_enter_guest->kvm_exc_entry.
>> From this perspective, I think it can exist in the call stack, and at 
>> the same time, we have obtained the maximum call stack information.
>>
>>
>>> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
>>> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
>>> like ret_from_kernel_thread_asm.
>>>
>> The following command can be used to dump the orc entries of the kernel:
>> ./tools/objtool/objtool --dump vmlinux
>>
>> You can observe that not all orc entries are generated at the 
>> beginning of the function.
>> For example:
>> handle_tlb_protect
>> ftrace_stub
>> handle_reserved
>>
>> So, is it unnecessary for us to modify UNWIND_HINT_REGS in order to 
>> place it at the beginning of the function.
>>
>> If you have a better solution, could you provide an example of the 
>> modification?
>> I can test the feasibility of the solution.
>>
> The expression at the beginning of the function is incorrect (feeling 
> sorry).
> It should be marked where have all stacktrace info.
> Thanks for all the explaining, since I'm unfamiliar with kvm, I need 
> these to help my understanding.
> 
> Can you try with follows, with save regs by $sp, set more precise era to 
> pt_regs, and more unwind hint.
> 
> 
> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> index f1768b7a6194..8ed1d7b72c54 100644
> --- a/arch/loongarch/kvm/switch.S
> +++ b/arch/loongarch/kvm/switch.S
> @@ -14,13 +14,13 @@
>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
> 
>   .macro kvm_save_host_gpr base
> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>       st.d    $r\n, \base, HGPR_OFFSET(\n)
>       .endr
>   .endm
> 
>   .macro kvm_restore_host_gpr base
> -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
> +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
>       ld.d    $r\n, \base, HGPR_OFFSET(\n)
>       .endr
>   .endm
> @@ -88,6 +88,7 @@
>       /* Load KVM_ARCH register */
>       ld.d    a2, a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
> 
> +111:
>       ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, 
> TLBRPRMD.ISTLBR = 0 */
>   .endm
> 
> @@ -158,9 +159,10 @@ SYM_CODE_START(kvm_exc_entry)
>       csrwr    t0, LOONGARCH_CSR_GTLBC
>       ld.d    tp, a2, KVM_ARCH_HTP
>       ld.d    sp, a2, KVM_ARCH_HSP
> +    UNWIND_HINT_REGS
> +
>       /* restore per cpu register */
>       ld.d    u0, a2, KVM_ARCH_HPERCPU
> -    addi.d    sp, sp, -PT_SIZE
> 
>       /* Prepare handle exception */
>       or    a0, s0, zero
> @@ -184,10 +186,11 @@ SYM_CODE_START(kvm_exc_entry)
>       csrwr    s1, KVM_VCPU_KS
>       kvm_switch_to_guest
> 
> +    UNWIND_HINT_UNDEFINED
>   ret_to_host:
> -    ld.d    a2, a2, KVM_ARCH_HSP
> -    addi.d  a2, a2, -PT_SIZE
> -    kvm_restore_host_gpr    a2
> +    ld.d    sp, a2, KVM_ARCH_HSP
> +    kvm_restore_host_gpr    sp
> +    addi.d    sp, sp, PT_SIZE
>       jr      ra
> 
>   SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
> @@ -200,11 +203,15 @@ SYM_CODE_END(kvm_exc_entry)
>    *  a0: kvm_run* run
>    *  a1: kvm_vcpu* vcpu
>    */
> -SYM_FUNC_START(kvm_enter_guest)
> +SYM_CODE_START(kvm_enter_guest)
> +    UNWIND_HINT_UNDEFINED
>       /* Allocate space in stack bottom */
> -    addi.d    a2, sp, -PT_SIZE
> +    addi.d    sp, sp, -PT_SIZE
>       /* Save host GPRs */
> -    kvm_save_host_gpr a2
> +    kvm_save_host_gpr sp
> +    la.pcrel a2, 111f
> +    st.d     a2, sp, PT_ERA
> +    UNWIND_HINT_REGS
> 
>       addi.d    a2, a1, KVM_VCPU_ARCH
>       st.d    sp, a2, KVM_ARCH_HSP
> 
wow, so quick and wonderful.

Huacai,
  the loongarch kernel and kvm maintainer, what is your point about this?

Regards
Bibo Mao
> Jinyang
> 

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Huacai Chen 1 month, 1 week ago
On Tue, Dec 30, 2025 at 9:21 AM Bibo Mao <maobibo@loongson.cn> wrote:
>
>
>
> On 2025/12/29 下午6:41, Jinyang He wrote:
> > On 2025-12-29 18:11, lixianglai wrote:
> >
> >> Hi Jinyang:
> >>>
> >>> On 2025-12-29 11:53, lixianglai wrote:
> >>>> Hi Jinyang:
> >>>>> On 2025-12-27 09:27, Xianglai Li wrote:
> >>>>>
> >>>>>> Insert the appropriate UNWIND macro definition into the
> >>>>>> kvm_exc_entry in
> >>>>>> the assembly function to guide the generation of correct ORC table
> >>>>>> entries,
> >>>>>> thereby solving the timeout problem of loading the
> >>>>>> livepatch-sample module
> >>>>>> on a physical machine running multiple vcpus virtual machines.
> >>>>>>
> >>>>>> While solving the above problems, we have gained an additional
> >>>>>> benefit,
> >>>>>> that is, we can obtain more call stack information
> >>>>>>
> >>>>>> Stack information that can be obtained before the problem is fixed:
> >>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> >>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> >>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> >>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> >>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> >>>>>> [<0>] kvm_exc_entry+0x100/0x1e0
> >>>>>>
> >>>>>> Stack information that can be obtained after the problem is fixed:
> >>>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> >>>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> >>>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> >>>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> >>>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> >>>>>> [<0>] kvm_exc_entry+0x104/0x1e4
> >>>>>> [<0>] kvm_enter_guest+0x38/0x11c
> >>>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
> >>>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
> >>>>>> [<0>] sys_ioctl+0x498/0xf00
> >>>>>> [<0>] do_syscall+0x98/0x1d0
> >>>>>> [<0>] handle_syscall+0xb8/0x158
> >>>>>>
> >>>>>> Cc: stable@vger.kernel.org
> >>>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
> >>>>>> ---
> >>>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
> >>>>>> Cc: WANG Xuerui <kernel@xen0n.name>
> >>>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
> >>>>>> Cc: Bibo Mao <maobibo@loongson.cn>
> >>>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
> >>>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
> >>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
> >>>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
> >>>>>>
> >>>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
> >>>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/kvm/switch.S
> >>>>>> b/arch/loongarch/kvm/switch.S
> >>>>>> index 93845ce53651..a3ea9567dbe5 100644
> >>>>>> --- a/arch/loongarch/kvm/switch.S
> >>>>>> +++ b/arch/loongarch/kvm/switch.S
> >>>>>> @@ -10,6 +10,7 @@
> >>>>>>   #include <asm/loongarch.h>
> >>>>>>   #include <asm/regdef.h>
> >>>>>>   #include <asm/unwind_hints.h>
> >>>>>> +#include <linux/kvm_types.h>
> >>>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
> >>>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
> >>>>>> @@ -110,9 +111,9 @@
> >>>>>>        * need to copy world switch code to DMW area.
> >>>>>>        */
> >>>>>>       .text
> >>>>>> +    .p2align PAGE_SHIFT
> >>>>>>       .cfi_sections    .debug_frame
> >>>>>>   SYM_CODE_START(kvm_exc_entry)
> >>>>>> -    .p2align PAGE_SHIFT
> >>>>>>       UNWIND_HINT_UNDEFINED
> >>>>>>       csrwr    a2,   KVM_TEMP_KS
> >>>>>>       csrrd    a2,   KVM_VCPU_KS
> >>>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
> >>>>>>       /* restore per cpu register */
> >>>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
> >>>>>>       addi.d    sp, sp, -PT_SIZE
> >>>>>> +    UNWIND_HINT_REGS
> >>>>>>         /* Prepare handle exception */
> >>>>>>       or    a0, s0, zero
> >>>>>> @@ -200,7 +202,7 @@ ret_to_host:
> >>>>>>       jr      ra
> >>>>>>     SYM_CODE_END(kvm_exc_entry)
> >>>>>> -EXPORT_SYMBOL(kvm_exc_entry)
> >>>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
> >>>>>>     /*
> >>>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
> >>>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
> >>>>>>       /* Save host GPRs */
> >>>>>>       kvm_save_host_gpr a2
> >>>>>>   +    /*
> >>>>>> +     * The csr_era member variable of the pt_regs structure is
> >>>>>> required
> >>>>>> +     * for unwinding orc to perform stack traceback, so we need
> >>>>>> to put
> >>>>>> +     * pc into csr_era member variable here.
> >>>>>> +     */
> >>>>>> +    pcaddi    t0, 0
> >>>>>> +    st.d    t0, a2, PT_ERA
> >>>>> Hi, Xianglai,
> >>>>>
> >>>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather
> >>>>> than
> >>>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark
> >>>>> "C-likely"
> >>>>> asm functionw.
> >>>>
> >>>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next
> >>>> version.
> >>>>
> >>>>> I guess the kvm_enter_guest is something like exception
> >>>>> handler becuase the last instruction is "ertn". So usually it should
> >>>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
> >>>>> However, all info is store to "$a2", this mark should be
> >>>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
> >>>>> I don't konw why save this function internal PC here by `pcaddi t0,
> >>>>> 0`,
> >>>>> and I think it is no meaning(, for exception handler, they save
> >>>>> last PC
> >>>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
> >>>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
> >>>>> is enable. So I wonder if there is really a stacktrace through this
> >>>>> function?
> >>>>>
> >>>> The stack backtracking issue in switch.S is rather complex because
> >>>> it involves the switching between cpu root-mode and guest-mode:
> >>>> Real stack backtracking should be divided into two parts:
> >>>> part 1:
> >>>>     [<0>] kvm_enter_guest+0x38/0x11c
> >>>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
> >>>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
> >>>>     [<0>] sys_ioctl+0x498/0xf00
> >>>>     [<0>] do_syscall+0x98/0x1d0
> >>>>     [<0>] handle_syscall+0xb8/0x158
> >>>>
> >>>> part 2:
> >>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> >>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> >>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> >>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> >>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> >>>>     [<0>] kvm_exc_entry+0x104/0x1e4
> >>>>
> >>>>
> >>>> In "part 1", after executing kvm_enter_guest, the cpu switches from
> >>>> root-mode to guest-mode.
> >>>> In this case, stack backtracking is indeed very rare.
> >>>>
> >>>> In "part 2", the cpu switches from the guest-mode to the root-mode,
> >>>> and most of the stack backtracking occurs during this phase.
> >>>>
> >>>> To obtain the longest call chain, we save pc in kvm_enter_guest to
> >>>> pt_regs.csr_era,
> >>>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
> >>>> The ORC entry was re-established using "UNWIND_HINT_REGS",
> >>>>  and then we obtained the following stack backtrace as we wanted:
> >>>>
> >>>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> >>>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> >>>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> >>>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> >>>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> >>>>     [<0>] kvm_exc_entry+0x104/0x1e4
> >>> I found this might be a coincidence—correct behavior due to the
> >>> incorrect
> >>> UNWIND_HINT_REGS mark and unusual stack adjustment.
> >>>
> >>> First, the kvm_enter_guest contains only a single branch instruction,
> >>> ertn.
> >>> It hardware-jump to the CSR.ERA address directly, jump into
> >>> kvm_exc_entry.
> >>>
> >>> At this point, the stack layout looks like this:
> >>> -------------------------------
> >>>   frame from call to `kvm_enter_guest`
> >>> -------------------------------  <- $sp
> >>>   PT_REGS
> >>> -------------------------------  <- $a2
> >>>
> >>> Then kvm_exc_entry adjust stack without save any register (e.g. $ra,
> >>> $sp)
> >>> but still marked UNWIND_HINT_REGS.
> >>> After the adjustment:
> >>> -------------------------------
> >>>   frame from call to `kvm_enter_guest`
> >>> -------------------------------
> >>>   PT_REGS
> >>> -------------------------------  <- $a2, new $sp
> >>>
> >>> During unwinding, when the unwinder reaches kvm_exc_entry,
> >>> it meets the mark of PT_REGS and correctly recovers
> >>>  pc = regs.csr_era, sp = regs.sp, ra = regs.ra
> >>>
> >> Yes, here unwinder does work as you say.
> >>
> >>> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
> >>> instead, like call?
> >> No,  we need to rely on the 'ertn instruction return PIE to CRMD IE,
> >> at the same time to ensure that its atomic,
> >> there should be no other instruction than' ertn 'more appropriate here.
> > You are right! I got it.
> >>
> >>> The kvm_exc_entry cannot back to kvm_enter_guest
> >>> if we use "ertn", so should the kvm_enter_guest appear on the
> >>> stacktrace?
> >>>
> >>
> >> It is flexible. As I mentioned above, the cpu completes the switch
> >> from host-mode to guest mode through kvm_enter_guest,
> >> and then the switch from guest mode to host-mode through
> >> kvm_exc_entry. When we ignore the details of the host-mode
> >> and guest-mode switching in the middle, we can understand that the
> >> host cpu has completed kvm_enter_guest->kvm_exc_entry.
> >> From this perspective, I think it can exist in the call stack, and at
> >> the same time, we have obtained the maximum call stack information.
> >>
> >>
> >>> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
> >>> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
> >>> like ret_from_kernel_thread_asm.
> >>>
> >> The following command can be used to dump the orc entries of the kernel:
> >> ./tools/objtool/objtool --dump vmlinux
> >>
> >> You can observe that not all orc entries are generated at the
> >> beginning of the function.
> >> For example:
> >> handle_tlb_protect
> >> ftrace_stub
> >> handle_reserved
> >>
> >> So, is it unnecessary for us to modify UNWIND_HINT_REGS in order to
> >> place it at the beginning of the function.
> >>
> >> If you have a better solution, could you provide an example of the
> >> modification?
> >> I can test the feasibility of the solution.
> >>
> > The expression at the beginning of the function is incorrect (feeling
> > sorry).
> > It should be marked where have all stacktrace info.
> > Thanks for all the explaining, since I'm unfamiliar with kvm, I need
> > these to help my understanding.
> >
> > Can you try with follows, with save regs by $sp, set more precise era to
> > pt_regs, and more unwind hint.
> >
> >
> > diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> > index f1768b7a6194..8ed1d7b72c54 100644
> > --- a/arch/loongarch/kvm/switch.S
> > +++ b/arch/loongarch/kvm/switch.S
> > @@ -14,13 +14,13 @@
> >   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
> >
> >   .macro kvm_save_host_gpr base
> > -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
> > +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
> >       st.d    $r\n, \base, HGPR_OFFSET(\n)
> >       .endr
> >   .endm
> >
> >   .macro kvm_restore_host_gpr base
> > -    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
> > +    .irp n,1,2,22,23,24,25,26,27,28,29,30,31
> >       ld.d    $r\n, \base, HGPR_OFFSET(\n)
> >       .endr
> >   .endm
> > @@ -88,6 +88,7 @@
> >       /* Load KVM_ARCH register */
> >       ld.d    a2, a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
> >
> > +111:
> >       ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0,
> > TLBRPRMD.ISTLBR = 0 */
> >   .endm
> >
> > @@ -158,9 +159,10 @@ SYM_CODE_START(kvm_exc_entry)
> >       csrwr    t0, LOONGARCH_CSR_GTLBC
> >       ld.d    tp, a2, KVM_ARCH_HTP
> >       ld.d    sp, a2, KVM_ARCH_HSP
> > +    UNWIND_HINT_REGS
> > +
> >       /* restore per cpu register */
> >       ld.d    u0, a2, KVM_ARCH_HPERCPU
> > -    addi.d    sp, sp, -PT_SIZE
> >
> >       /* Prepare handle exception */
> >       or    a0, s0, zero
> > @@ -184,10 +186,11 @@ SYM_CODE_START(kvm_exc_entry)
> >       csrwr    s1, KVM_VCPU_KS
> >       kvm_switch_to_guest
> >
> > +    UNWIND_HINT_UNDEFINED
> >   ret_to_host:
> > -    ld.d    a2, a2, KVM_ARCH_HSP
> > -    addi.d  a2, a2, -PT_SIZE
> > -    kvm_restore_host_gpr    a2
> > +    ld.d    sp, a2, KVM_ARCH_HSP
> > +    kvm_restore_host_gpr    sp
> > +    addi.d    sp, sp, PT_SIZE
> >       jr      ra
> >
> >   SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
> > @@ -200,11 +203,15 @@ SYM_CODE_END(kvm_exc_entry)
> >    *  a0: kvm_run* run
> >    *  a1: kvm_vcpu* vcpu
> >    */
> > -SYM_FUNC_START(kvm_enter_guest)
> > +SYM_CODE_START(kvm_enter_guest)
> > +    UNWIND_HINT_UNDEFINED
> >       /* Allocate space in stack bottom */
> > -    addi.d    a2, sp, -PT_SIZE
> > +    addi.d    sp, sp, -PT_SIZE
> >       /* Save host GPRs */
> > -    kvm_save_host_gpr a2
> > +    kvm_save_host_gpr sp
> > +    la.pcrel a2, 111f
> > +    st.d     a2, sp, PT_ERA
> > +    UNWIND_HINT_REGS
> >
> >       addi.d    a2, a1, KVM_VCPU_ARCH
> >       st.d    sp, a2, KVM_ARCH_HSP
> >
> wow, so quick and wonderful.
>
> Huacai,
>   the loongarch kernel and kvm maintainer, what is your point about this?
Xianglai's patch really has some problems. But Jinyang's solution is a
little big, can we minimize the changes? For example, don't touch
kvm_exc_entry, only modify kvm_enter_guest. In my opinion the current
problem has no relationship with kvm_exc_entry (but I'm not sure).

Huacai

>
> Regards
> Bibo Mao
> > Jinyang
> >
>
>
Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Bibo Mao 1 month, 1 week ago

On 2025/12/29 下午3:10, Jinyang He wrote:
> 
> On 2025-12-29 11:53, lixianglai wrote:
>> Hi Jinyang:
>>> On 2025-12-27 09:27, Xianglai Li wrote:
>>>
>>>> Insert the appropriate UNWIND macro definition into the 
>>>> kvm_exc_entry in
>>>> the assembly function to guide the generation of correct ORC table 
>>>> entries,
>>>> thereby solving the timeout problem of loading the livepatch-sample 
>>>> module
>>>> on a physical machine running multiple vcpus virtual machines.
>>>>
>>>> While solving the above problems, we have gained an additional benefit,
>>>> that is, we can obtain more call stack information
>>>>
>>>> Stack information that can be obtained before the problem is fixed:
>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>> [<0>] kvm_exc_entry+0x100/0x1e0
>>>>
>>>> Stack information that can be obtained after the problem is fixed:
>>>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>>> [<0>] kvm_exc_entry+0x104/0x1e4
>>>> [<0>] kvm_enter_guest+0x38/0x11c
>>>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>>> [<0>] sys_ioctl+0x498/0xf00
>>>> [<0>] do_syscall+0x98/0x1d0
>>>> [<0>] handle_syscall+0xb8/0x158
>>>>
>>>> Cc: stable@vger.kernel.org
>>>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>>>> ---
>>>> Cc: Huacai Chen <chenhuacai@kernel.org>
>>>> Cc: WANG Xuerui <kernel@xen0n.name>
>>>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>>>> Cc: Bibo Mao <maobibo@loongson.cn>
>>>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>>>> Cc: Xianglai Li <lixianglai@loongson.cn>
>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>>>
>>>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>>>> index 93845ce53651..a3ea9567dbe5 100644
>>>> --- a/arch/loongarch/kvm/switch.S
>>>> +++ b/arch/loongarch/kvm/switch.S
>>>> @@ -10,6 +10,7 @@
>>>>   #include <asm/loongarch.h>
>>>>   #include <asm/regdef.h>
>>>>   #include <asm/unwind_hints.h>
>>>> +#include <linux/kvm_types.h>
>>>>     #define HGPR_OFFSET(x)        (PT_R0 + 8*x)
>>>>   #define GGPR_OFFSET(x)        (KVM_ARCH_GGPR + 8*x)
>>>> @@ -110,9 +111,9 @@
>>>>        * need to copy world switch code to DMW area.
>>>>        */
>>>>       .text
>>>> +    .p2align PAGE_SHIFT
>>>>       .cfi_sections    .debug_frame
>>>>   SYM_CODE_START(kvm_exc_entry)
>>>> -    .p2align PAGE_SHIFT
>>>>       UNWIND_HINT_UNDEFINED
>>>>       csrwr    a2,   KVM_TEMP_KS
>>>>       csrrd    a2,   KVM_VCPU_KS
>>>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>>>       /* restore per cpu register */
>>>>       ld.d    u0, a2, KVM_ARCH_HPERCPU
>>>>       addi.d    sp, sp, -PT_SIZE
>>>> +    UNWIND_HINT_REGS
>>>>         /* Prepare handle exception */
>>>>       or    a0, s0, zero
>>>> @@ -200,7 +202,7 @@ ret_to_host:
>>>>       jr      ra
>>>>     SYM_CODE_END(kvm_exc_entry)
>>>> -EXPORT_SYMBOL(kvm_exc_entry)
>>>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
>>>>     /*
>>>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>>>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>>>       /* Save host GPRs */
>>>>       kvm_save_host_gpr a2
>>>>   +    /*
>>>> +     * The csr_era member variable of the pt_regs structure is 
>>>> required
>>>> +     * for unwinding orc to perform stack traceback, so we need to put
>>>> +     * pc into csr_era member variable here.
>>>> +     */
>>>> +    pcaddi    t0, 0
>>>> +    st.d    t0, a2, PT_ERA
>>> Hi, Xianglai,
>>>
>>> It should use `SYM_CODE_START` to mark the `kvm_enter_guest` rather than
>>> `SYM_FUNC_START`, since the `SYM_FUNC_START` is used to mark "C-likely"
>>> asm functionw. 
>>
>> Ok, I will use SYM_CODE_START to mark kvm_enter_guest in the next 
>> version.
>>
>>> I guess the kvm_enter_guest is something like exception
>>> handler becuase the last instruction is "ertn". So usually it should
>>> mark UNWIND_HINT_REGS where can find last frame info by "$sp".
>>> However, all info is store to "$a2", this mark should be
>>>   `UNWIND_HINT sp_reg=ORC_REG_A2(???) type=UNWIND_HINT_TYPE_REGS`.
>>> I don't konw why save this function internal PC here by `pcaddi t0, 0`,
>>> and I think it is no meaning(, for exception handler, they save last PC
>>> by read CSR.ERA). The `kvm_enter_guest` saves registers by
>>> "$a2"("$sp" - PT_REGS) beyond stack ("$sp"), it is dangerous if IE
>>> is enable. So I wonder if there is really a stacktrace through this 
>>> function?
>>>
>> The stack backtracking issue in switch.S is rather complex because it 
>> involves the switching between cpu root-mode and guest-mode:
>> Real stack backtracking should be divided into two parts:
>> part 1:
>>     [<0>] kvm_enter_guest+0x38/0x11c
>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>     [<0>] sys_ioctl+0x498/0xf00
>>     [<0>] do_syscall+0x98/0x1d0
>>     [<0>] handle_syscall+0xb8/0x158
>>
>> part 2:
>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>     [<0>] kvm_exc_entry+0x104/0x1e4
>>
>>
>> In "part 1", after executing kvm_enter_guest, the cpu switches from 
>> root-mode to guest-mode.
>> In this case, stack backtracking is indeed very rare.
>>
>> In "part 2", the cpu switches from the guest-mode to the root-mode,
>> and most of the stack backtracking occurs during this phase.
>>
>> To obtain the longest call chain, we save pc in kvm_enter_guest to 
>> pt_regs.csr_era,
>> and after restoring the sp of the root-mode cpu in kvm_exc_entry,
>> The ORC entry was re-established using "UNWIND_HINT_REGS",
>>  and then we obtained the following stack backtrace as we wanted:
>>
>>     [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>>     [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>>     [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>>     [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>>     [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>>     [<0>] kvm_exc_entry+0x104/0x1e4
> I found this might be a coincidence—correct behavior due to the incorrect
> UNWIND_HINT_REGS mark and unusual stack adjustment.
> 
> First, the kvm_enter_guest contains only a single branch instruction, ertn.
> It hardware-jump to the CSR.ERA address directly, jump into kvm_exc_entry.
> 
> At this point, the stack layout looks like this:
> -------------------------------
>    frame from call to `kvm_enter_guest`
> -------------------------------  <- $sp
>    PT_REGS
> -------------------------------  <- $a2
> 
> Then kvm_exc_entry adjust stack without save any register (e.g. $ra, $sp)
> but still marked UNWIND_HINT_REGS.
> After the adjustment:
> -------------------------------
>    frame from call to `kvm_enter_guest`
> -------------------------------
>    PT_REGS
> -------------------------------  <- $a2, new $sp
> 
> During unwinding, when the unwinder reaches kvm_exc_entry,
> it meets the mark of PT_REGS and correctly recovers
>   pc = regs.csr_era, sp = regs.sp, ra = regs.ra
> 
> a) Can we avoid "ertn" rather than `jr reg (or jirl ra, reg, 0)`
> instead, like call? The kvm_exc_entry cannot back to kvm_enter_guest
> if we use "ertn", so should the kvm_enter_guest appear on the stacktrace?
> 
> b) Can we adjust $sp before entering kvm_exc_entry? Then we can mark
> UNWIND_HINT_REGS at the beginning of kvm_exc_entry, which something
> like ret_from_kernel_thread_asm.
Talk is cheap. Could you show your code?

Regards
Bibo Mao
> 
>> [<0>] kvm_enter_guest+0x38/0x11c
>>     [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>>     [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>>     [<0>] sys_ioctl+0x498/0xf00
>>     [<0>] do_syscall+0x98/0x1d0
>>     [<0>] handle_syscall+0xb8/0x158
>>
>> Doing so is equivalent to ignoring the details of the cpu root-mode 
>> and guest-mode switching.
>> About what you said in the IE enable phase is dangerous,
>> interrupts are always off during the cpu root-mode and guest-mode 
>> switching in kvm_enter_guest and kvm_exc_entry.
>>
> 

Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by Huacai Chen 1 month, 2 weeks ago
Hi, Xianglai,

On Sat, Dec 27, 2025 at 9:52 AM Xianglai Li <lixianglai@loongson.cn> wrote:
>
> Insert the appropriate UNWIND macro definition into the kvm_exc_entry in
> the assembly function to guide the generation of correct ORC table entries,
> thereby solving the timeout problem of loading the livepatch-sample module
> on a physical machine running multiple vcpus virtual machines.
>
> While solving the above problems, we have gained an additional benefit,
> that is, we can obtain more call stack information
>
> Stack information that can be obtained before the problem is fixed:
> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> [<0>] kvm_exc_entry+0x100/0x1e0
>
> Stack information that can be obtained after the problem is fixed:
> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
> [<0>] kvm_exc_entry+0x104/0x1e4
> [<0>] kvm_enter_guest+0x38/0x11c
> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
> [<0>] sys_ioctl+0x498/0xf00
> [<0>] do_syscall+0x98/0x1d0
> [<0>] handle_syscall+0xb8/0x158
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
> ---
> Cc: Huacai Chen <chenhuacai@kernel.org>
> Cc: WANG Xuerui <kernel@xen0n.name>
> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
> Cc: Bibo Mao <maobibo@loongson.cn>
> Cc: Charlie Jenkins <charlie@rivosinc.com>
> Cc: Xianglai Li <lixianglai@loongson.cn>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>
>  arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>  1 file changed, 19 insertions(+), 9 deletions(-)
>
> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> index 93845ce53651..a3ea9567dbe5 100644
> --- a/arch/loongarch/kvm/switch.S
> +++ b/arch/loongarch/kvm/switch.S
> @@ -10,6 +10,7 @@
>  #include <asm/loongarch.h>
>  #include <asm/regdef.h>
>  #include <asm/unwind_hints.h>
> +#include <linux/kvm_types.h>
>
>  #define HGPR_OFFSET(x)         (PT_R0 + 8*x)
>  #define GGPR_OFFSET(x)         (KVM_ARCH_GGPR + 8*x)
> @@ -110,9 +111,9 @@
>          * need to copy world switch code to DMW area.
>          */
>         .text
> +       .p2align PAGE_SHIFT
>         .cfi_sections   .debug_frame
>  SYM_CODE_START(kvm_exc_entry)
> -       .p2align PAGE_SHIFT
>         UNWIND_HINT_UNDEFINED
>         csrwr   a2,   KVM_TEMP_KS
>         csrrd   a2,   KVM_VCPU_KS
> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>         /* restore per cpu register */
>         ld.d    u0, a2, KVM_ARCH_HPERCPU
>         addi.d  sp, sp, -PT_SIZE
> +       UNWIND_HINT_REGS
>
>         /* Prepare handle exception */
>         or      a0, s0, zero
> @@ -200,7 +202,7 @@ ret_to_host:
>         jr      ra
>
>  SYM_CODE_END(kvm_exc_entry)
> -EXPORT_SYMBOL(kvm_exc_entry)
> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
Why not use EXPORT_SYMBOL_FOR_KVM in the first patch directly?

>
>  /*
>   * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>         /* Save host GPRs */
>         kvm_save_host_gpr a2
>
> +       /*
> +        * The csr_era member variable of the pt_regs structure is required
> +        * for unwinding orc to perform stack traceback, so we need to put
> +        * pc into csr_era member variable here.
> +        */
> +       pcaddi  t0, 0
> +       st.d    t0, a2, PT_ERA
I am still confused here, does this overwrite PT_ERA stored by
kvm_save_host_gpr?

Huacai

> +
>         addi.d  a2, a1, KVM_VCPU_ARCH
>         st.d    sp, a2, KVM_ARCH_HSP
>         st.d    tp, a2, KVM_ARCH_HTP
> @@ -225,7 +235,7 @@ SYM_FUNC_START(kvm_enter_guest)
>         csrwr   a1, KVM_VCPU_KS
>         kvm_switch_to_guest
>  SYM_FUNC_END(kvm_enter_guest)
> -EXPORT_SYMBOL(kvm_enter_guest)
> +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest)
>
>  SYM_FUNC_START(kvm_save_fpu)
>         fpu_save_csr    a0 t1
> @@ -233,7 +243,7 @@ SYM_FUNC_START(kvm_save_fpu)
>         fpu_save_cc     a0 t1 t2
>         jr              ra
>  SYM_FUNC_END(kvm_save_fpu)
> -EXPORT_SYMBOL(kvm_save_fpu)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu)
>
>  SYM_FUNC_START(kvm_restore_fpu)
>         fpu_restore_double a0 t1
> @@ -241,7 +251,7 @@ SYM_FUNC_START(kvm_restore_fpu)
>         fpu_restore_cc     a0 t1 t2
>         jr                 ra
>  SYM_FUNC_END(kvm_restore_fpu)
> -EXPORT_SYMBOL(kvm_restore_fpu)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu)
>
>  #ifdef CONFIG_CPU_HAS_LSX
>  SYM_FUNC_START(kvm_save_lsx)
> @@ -250,7 +260,7 @@ SYM_FUNC_START(kvm_save_lsx)
>         lsx_save_data   a0 t1
>         jr              ra
>  SYM_FUNC_END(kvm_save_lsx)
> -EXPORT_SYMBOL(kvm_save_lsx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx)
>
>  SYM_FUNC_START(kvm_restore_lsx)
>         lsx_restore_data a0 t1
> @@ -258,7 +268,7 @@ SYM_FUNC_START(kvm_restore_lsx)
>         fpu_restore_csr  a0 t1 t2
>         jr               ra
>  SYM_FUNC_END(kvm_restore_lsx)
> -EXPORT_SYMBOL(kvm_restore_lsx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx)
>  #endif
>
>  #ifdef CONFIG_CPU_HAS_LASX
> @@ -268,7 +278,7 @@ SYM_FUNC_START(kvm_save_lasx)
>         lasx_save_data  a0 t1
>         jr              ra
>  SYM_FUNC_END(kvm_save_lasx)
> -EXPORT_SYMBOL(kvm_save_lasx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx)
>
>  SYM_FUNC_START(kvm_restore_lasx)
>         lasx_restore_data a0 t1
> @@ -276,7 +286,7 @@ SYM_FUNC_START(kvm_restore_lasx)
>         fpu_restore_csr   a0 t1 t2
>         jr                ra
>  SYM_FUNC_END(kvm_restore_lasx)
> -EXPORT_SYMBOL(kvm_restore_lasx)
> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx)
>  #endif
>
>  #ifdef CONFIG_CPU_HAS_LBT
> --
> 2.39.1
>
Re: [PATCH V3 2/2] LoongArch: KVM: fix "unreliable stack" issue
Posted by lixianglai 1 month, 1 week ago
Hi Huacai:
> Hi, Xianglai,
>
> On Sat, Dec 27, 2025 at 9:52 AM Xianglai Li <lixianglai@loongson.cn> wrote:
>> Insert the appropriate UNWIND macro definition into the kvm_exc_entry in
>> the assembly function to guide the generation of correct ORC table entries,
>> thereby solving the timeout problem of loading the livepatch-sample module
>> on a physical machine running multiple vcpus virtual machines.
>>
>> While solving the above problems, we have gained an additional benefit,
>> that is, we can obtain more call stack information
>>
>> Stack information that can be obtained before the problem is fixed:
>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>> [<0>] kvm_exc_entry+0x100/0x1e0
>>
>> Stack information that can be obtained after the problem is fixed:
>> [<0>] kvm_vcpu_block+0x88/0x120 [kvm]
>> [<0>] kvm_vcpu_halt+0x68/0x580 [kvm]
>> [<0>] kvm_emu_idle+0xd4/0xf0 [kvm]
>> [<0>] kvm_handle_gspr+0x7c/0x700 [kvm]
>> [<0>] kvm_handle_exit+0x160/0x270 [kvm]
>> [<0>] kvm_exc_entry+0x104/0x1e4
>> [<0>] kvm_enter_guest+0x38/0x11c
>> [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm]
>> [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm]
>> [<0>] sys_ioctl+0x498/0xf00
>> [<0>] do_syscall+0x98/0x1d0
>> [<0>] handle_syscall+0xb8/0x158
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Xianglai Li <lixianglai@loongson.cn>
>> ---
>> Cc: Huacai Chen <chenhuacai@kernel.org>
>> Cc: WANG Xuerui <kernel@xen0n.name>
>> Cc: Tianrui Zhao <zhaotianrui@loongson.cn>
>> Cc: Bibo Mao <maobibo@loongson.cn>
>> Cc: Charlie Jenkins <charlie@rivosinc.com>
>> Cc: Xianglai Li <lixianglai@loongson.cn>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
>>
>>   arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++---------
>>   1 file changed, 19 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>> index 93845ce53651..a3ea9567dbe5 100644
>> --- a/arch/loongarch/kvm/switch.S
>> +++ b/arch/loongarch/kvm/switch.S
>> @@ -10,6 +10,7 @@
>>   #include <asm/loongarch.h>
>>   #include <asm/regdef.h>
>>   #include <asm/unwind_hints.h>
>> +#include <linux/kvm_types.h>
>>
>>   #define HGPR_OFFSET(x)         (PT_R0 + 8*x)
>>   #define GGPR_OFFSET(x)         (KVM_ARCH_GGPR + 8*x)
>> @@ -110,9 +111,9 @@
>>           * need to copy world switch code to DMW area.
>>           */
>>          .text
>> +       .p2align PAGE_SHIFT
>>          .cfi_sections   .debug_frame
>>   SYM_CODE_START(kvm_exc_entry)
>> -       .p2align PAGE_SHIFT
>>          UNWIND_HINT_UNDEFINED
>>          csrwr   a2,   KVM_TEMP_KS
>>          csrrd   a2,   KVM_VCPU_KS
>> @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry)
>>          /* restore per cpu register */
>>          ld.d    u0, a2, KVM_ARCH_HPERCPU
>>          addi.d  sp, sp, -PT_SIZE
>> +       UNWIND_HINT_REGS
>>
>>          /* Prepare handle exception */
>>          or      a0, s0, zero
>> @@ -200,7 +202,7 @@ ret_to_host:
>>          jr      ra
>>
>>   SYM_CODE_END(kvm_exc_entry)
>> -EXPORT_SYMBOL(kvm_exc_entry)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry)
> Why not use EXPORT_SYMBOL_FOR_KVM in the first patch directly?
Ok,I will put it in the first patch.

>>   /*
>>    * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>> @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest)
>>          /* Save host GPRs */
>>          kvm_save_host_gpr a2
>>
>> +       /*
>> +        * The csr_era member variable of the pt_regs structure is required
>> +        * for unwinding orc to perform stack traceback, so we need to put
>> +        * pc into csr_era member variable here.
>> +        */
>> +       pcaddi  t0, 0
>> +       st.d    t0, a2, PT_ERA
> I am still confused here, does this overwrite PT_ERA stored by
> kvm_save_host_gpr?

No, kvm_save_host_gpr does not overwrite PT_ERA. In fact,
  it is precisely because PT_ERA is not assigned a value that we need to 
add these lines of statements here.

Now the a2 register points to the structure pt_regs. Here, we are 
assigning a value to the pt_regs.csr_era member variable.
Then the function unwind_next_frame uses the pt_regs.csr_era member 
variable as a pc to do the stack traceback.

Thanks,
Xianglai.

> Huacai
>
>> +
>>          addi.d  a2, a1, KVM_VCPU_ARCH
>>          st.d    sp, a2, KVM_ARCH_HSP
>>          st.d    tp, a2, KVM_ARCH_HTP
>> @@ -225,7 +235,7 @@ SYM_FUNC_START(kvm_enter_guest)
>>          csrwr   a1, KVM_VCPU_KS
>>          kvm_switch_to_guest
>>   SYM_FUNC_END(kvm_enter_guest)
>> -EXPORT_SYMBOL(kvm_enter_guest)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest)
>>
>>   SYM_FUNC_START(kvm_save_fpu)
>>          fpu_save_csr    a0 t1
>> @@ -233,7 +243,7 @@ SYM_FUNC_START(kvm_save_fpu)
>>          fpu_save_cc     a0 t1 t2
>>          jr              ra
>>   SYM_FUNC_END(kvm_save_fpu)
>> -EXPORT_SYMBOL(kvm_save_fpu)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu)
>>
>>   SYM_FUNC_START(kvm_restore_fpu)
>>          fpu_restore_double a0 t1
>> @@ -241,7 +251,7 @@ SYM_FUNC_START(kvm_restore_fpu)
>>          fpu_restore_cc     a0 t1 t2
>>          jr                 ra
>>   SYM_FUNC_END(kvm_restore_fpu)
>> -EXPORT_SYMBOL(kvm_restore_fpu)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu)
>>
>>   #ifdef CONFIG_CPU_HAS_LSX
>>   SYM_FUNC_START(kvm_save_lsx)
>> @@ -250,7 +260,7 @@ SYM_FUNC_START(kvm_save_lsx)
>>          lsx_save_data   a0 t1
>>          jr              ra
>>   SYM_FUNC_END(kvm_save_lsx)
>> -EXPORT_SYMBOL(kvm_save_lsx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx)
>>
>>   SYM_FUNC_START(kvm_restore_lsx)
>>          lsx_restore_data a0 t1
>> @@ -258,7 +268,7 @@ SYM_FUNC_START(kvm_restore_lsx)
>>          fpu_restore_csr  a0 t1 t2
>>          jr               ra
>>   SYM_FUNC_END(kvm_restore_lsx)
>> -EXPORT_SYMBOL(kvm_restore_lsx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx)
>>   #endif
>>
>>   #ifdef CONFIG_CPU_HAS_LASX
>> @@ -268,7 +278,7 @@ SYM_FUNC_START(kvm_save_lasx)
>>          lasx_save_data  a0 t1
>>          jr              ra
>>   SYM_FUNC_END(kvm_save_lasx)
>> -EXPORT_SYMBOL(kvm_save_lasx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx)
>>
>>   SYM_FUNC_START(kvm_restore_lasx)
>>          lasx_restore_data a0 t1
>> @@ -276,7 +286,7 @@ SYM_FUNC_START(kvm_restore_lasx)
>>          fpu_restore_csr   a0 t1 t2
>>          jr                ra
>>   SYM_FUNC_END(kvm_restore_lasx)
>> -EXPORT_SYMBOL(kvm_restore_lasx)
>> +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx)
>>   #endif
>>
>>   #ifdef CONFIG_CPU_HAS_LBT
>> --
>> 2.39.1
>>