From: Xi Ruoyao <xry111@xry111.site>
Use a trampoline as an exception handlers, which can kill some use of
la.abs in preparation for the subsequent support of the PIE kernel.
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Youling Tang <tangyouling@loongson.cn>
---
arch/loongarch/include/asm/stackframe.h | 8 +++----
arch/loongarch/include/asm/uaccess.h | 1 -
arch/loongarch/kernel/entry.S | 6 +++---
arch/loongarch/kernel/genex.S | 20 +++++++++---------
arch/loongarch/kernel/head.S | 2 +-
arch/loongarch/kernel/traps.c | 4 +++-
arch/loongarch/mm/tlbex.S | 28 +++++++------------------
7 files changed, 29 insertions(+), 40 deletions(-)
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 4ca953062b5b..96c94035b5d0 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -76,8 +76,8 @@
* kernelsp array for it. It stores the current sp in t0 and loads the
* new value in sp.
*/
- .macro get_saved_sp docfi=0
- la.abs t1, kernelsp
+ .macro get_saved_sp docfi=0
+ la.pcrel t1, kernelsp
#ifdef CONFIG_SMP
csrrd t0, PERCPU_BASE_KS
LONG_ADD t1, t1, t0
@@ -89,8 +89,8 @@
LONG_L sp, t1, 0
.endm
- .macro set_saved_sp stackp temp temp2
- la.abs \temp, kernelsp
+ .macro set_saved_sp stackp temp temp2
+ la.pcrel \temp, kernelsp
#ifdef CONFIG_SMP
LONG_ADD \temp, \temp, u0
#endif
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
index 255899d4a7c3..0d22991ae430 100644
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -22,7 +22,6 @@
extern u64 __ua_limit;
#define __UA_ADDR ".dword"
-#define __UA_LA "la.abs"
#define __UA_LIMIT __ua_limit
/*
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index d53b631c9022..ca01afdbec3f 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -18,9 +18,9 @@
.text
.cfi_sections .debug_frame
.align 5
-SYM_FUNC_START(handle_syscall)
+SYM_FUNC_START(handle_sys)
csrrd t0, PERCPU_BASE_KS
- la.abs t1, kernelsp
+ la.pcrel t1, kernelsp
add.d t1, t1, t0
move t2, sp
ld.d sp, t1, 0
@@ -66,7 +66,7 @@ SYM_FUNC_START(handle_syscall)
bl do_syscall
RESTORE_ALL_AND_RET
-SYM_FUNC_END(handle_syscall)
+SYM_FUNC_END(handle_sys)
SYM_CODE_START(ret_from_fork)
bl schedule_tail # a0 = struct task_struct *prev
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 75e5be807a0d..d3df0fa725a2 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -32,9 +32,8 @@ SYM_FUNC_START(__arch_cpu_idle)
SYM_FUNC_END(__arch_cpu_idle)
SYM_FUNC_START(handle_vint)
- BACKUP_T0T1
SAVE_ALL
- la.abs t1, __arch_cpu_idle
+ la.pcrel t1, __arch_cpu_idle
LONG_L t0, sp, PT_ERA
/* 32 byte rollback region */
ori t0, t0, 0x1f
@@ -43,8 +42,7 @@ SYM_FUNC_START(handle_vint)
LONG_S t0, sp, PT_ERA
1: move a0, sp
move a1, sp
- la.abs t0, do_vint
- jirl ra, t0, 0
+ bl do_vint
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_vint)
@@ -67,12 +65,10 @@ SYM_FUNC_END(except_vec_cex)
.macro BUILD_HANDLER exception handler prep
.align 5
SYM_FUNC_START(handle_\exception)
- BACKUP_T0T1
SAVE_ALL
build_prep_\prep
move a0, sp
- la.abs t0, do_\handler
- jirl ra, t0, 0
+ bl do_\handler
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_\exception)
.endm
@@ -89,7 +85,11 @@ SYM_FUNC_END(except_vec_cex)
BUILD_HANDLER watch watch none
BUILD_HANDLER reserved reserved none /* others */
-SYM_FUNC_START(handle_sys)
- la.abs t0, handle_syscall
+SYM_FUNC_START(handler_trampoline)
+ csrwr t0, EXCEPTION_KS0
+ csrwr t1, EXCEPTION_KS1
+ pcaddi t0, 0
+ ld.d t0, t0, 16
jr t0
-SYM_FUNC_END(handle_sys)
+ nop
+SYM_FUNC_END(handler_trampoline)
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 57bada6b4e93..aa6181714ec3 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -117,7 +117,7 @@ SYM_CODE_START(smpboot_entry)
li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0
csrwr t0, LOONGARCH_CSR_EUEN
- la.abs t0, cpuboot_data
+ la.pcrel t0, cpuboot_data
ld.d sp, t0, CPU_BOOT_STACK
ld.d tp, t0, CPU_BOOT_TINFO
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 7ea62faeeadb..0e8faaca3679 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -61,6 +61,7 @@ extern asmlinkage void handle_lasx(void);
extern asmlinkage void handle_reserved(void);
extern asmlinkage void handle_watch(void);
extern asmlinkage void handle_vint(void);
+extern asmlinkage void handler_trampoline(void);
static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
const char *loglvl, bool user)
@@ -716,7 +717,8 @@ void per_cpu_trap_init(int cpu)
/* Install CPU exception handler */
void set_handler(unsigned long offset, void *addr, unsigned long size)
{
- memcpy((void *)(eentry + offset), addr, size);
+ memcpy((void *)(eentry + offset), &handler_trampoline, 24);
+ memcpy((void *)(eentry + offset + 24), &addr, 8);
local_flush_icache_range(eentry + offset, eentry + offset + size);
}
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index 58781c6e4191..cfaacdac518c 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -24,8 +24,7 @@
move a0, sp
REG_S a2, sp, PT_BVADDR
li.w a1, \write
- la.abs t0, do_page_fault
- jirl ra, t0, 0
+ bl do_page_fault
RESTORE_ALL_AND_RET
SYM_FUNC_END(tlb_do_page_fault_\write)
.endm
@@ -34,20 +33,16 @@
tlb_do_page_fault 1
SYM_FUNC_START(handle_tlb_protect)
- BACKUP_T0T1
SAVE_ALL
move a0, sp
move a1, zero
csrrd a2, LOONGARCH_CSR_BADV
REG_S a2, sp, PT_BVADDR
- la.abs t0, do_page_fault
- jirl ra, t0, 0
+ bl do_page_fault
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_tlb_protect)
SYM_FUNC_START(handle_tlb_load)
- csrwr t0, EXCEPTION_KS0
- csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
/*
@@ -116,7 +111,7 @@ smp_pgtable_change_load:
#ifdef CONFIG_64BIT
vmalloc_load:
- la.abs t1, swapper_pg_dir
+ la.pcrel t1, swapper_pg_dir
b vmalloc_done_load
#endif
@@ -187,13 +182,10 @@ tlb_huge_update_load:
nopage_tlb_load:
dbar 0
csrrd ra, EXCEPTION_KS2
- la.abs t0, tlb_do_page_fault_0
- jr t0
+ b tlb_do_page_fault_0
SYM_FUNC_END(handle_tlb_load)
SYM_FUNC_START(handle_tlb_store)
- csrwr t0, EXCEPTION_KS0
- csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
/*
@@ -263,7 +255,7 @@ smp_pgtable_change_store:
#ifdef CONFIG_64BIT
vmalloc_store:
- la.abs t1, swapper_pg_dir
+ la.pcrel t1, swapper_pg_dir
b vmalloc_done_store
#endif
@@ -336,13 +328,10 @@ tlb_huge_update_store:
nopage_tlb_store:
dbar 0
csrrd ra, EXCEPTION_KS2
- la.abs t0, tlb_do_page_fault_1
- jr t0
+ b tlb_do_page_fault_1
SYM_FUNC_END(handle_tlb_store)
SYM_FUNC_START(handle_tlb_modify)
- csrwr t0, EXCEPTION_KS0
- csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
/*
@@ -411,7 +400,7 @@ smp_pgtable_change_modify:
#ifdef CONFIG_64BIT
vmalloc_modify:
- la.abs t1, swapper_pg_dir
+ la.pcrel t1, swapper_pg_dir
b vmalloc_done_modify
#endif
@@ -483,8 +472,7 @@ tlb_huge_update_modify:
nopage_tlb_modify:
dbar 0
csrrd ra, EXCEPTION_KS2
- la.abs t0, tlb_do_page_fault_1
- jr t0
+ b tlb_do_page_fault_1
SYM_FUNC_END(handle_tlb_modify)
SYM_FUNC_START(handle_tlb_refill)
--
2.37.1
Hi, Ruoyao and Youling, I care about the performance when NUMA enabled. We set CSR.EENTRY for each possible cpus where is NUMA-relative. So, I guess the more codes in NUMA-relative memory makes more performance. If we just set handler_trampoline as exception handler, the performance may be influenced. Thanks, Jinyang On 2023-01-09 17:07, Youling Tang wrote: > From: Xi Ruoyao <xry111@xry111.site> > > Use a trampoline as an exception handlers, which can kill some use of > la.abs in preparation for the subsequent support of the PIE kernel. > > Signed-off-by: Xi Ruoyao <xry111@xry111.site> > Signed-off-by: Youling Tang <tangyouling@loongson.cn> > --- > arch/loongarch/include/asm/stackframe.h | 8 +++---- > arch/loongarch/include/asm/uaccess.h | 1 - > arch/loongarch/kernel/entry.S | 6 +++--- > arch/loongarch/kernel/genex.S | 20 +++++++++--------- > arch/loongarch/kernel/head.S | 2 +- > arch/loongarch/kernel/traps.c | 4 +++- > arch/loongarch/mm/tlbex.S | 28 +++++++------------------ > 7 files changed, 29 insertions(+), 40 deletions(-) > > diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h > index 4ca953062b5b..96c94035b5d0 100644 > --- a/arch/loongarch/include/asm/stackframe.h > +++ b/arch/loongarch/include/asm/stackframe.h > @@ -76,8 +76,8 @@ > * kernelsp array for it. It stores the current sp in t0 and loads the > * new value in sp. > */ > - .macro get_saved_sp docfi=0 > - la.abs t1, kernelsp > + .macro get_saved_sp docfi=0 > + la.pcrel t1, kernelsp > #ifdef CONFIG_SMP > csrrd t0, PERCPU_BASE_KS > LONG_ADD t1, t1, t0 > @@ -89,8 +89,8 @@ > LONG_L sp, t1, 0 > .endm > > - .macro set_saved_sp stackp temp temp2 > - la.abs \temp, kernelsp > + .macro set_saved_sp stackp temp temp2 > + la.pcrel \temp, kernelsp > #ifdef CONFIG_SMP > LONG_ADD \temp, \temp, u0 > #endif > diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h > index 255899d4a7c3..0d22991ae430 100644 > --- a/arch/loongarch/include/asm/uaccess.h > +++ b/arch/loongarch/include/asm/uaccess.h > @@ -22,7 +22,6 @@ > extern u64 __ua_limit; > > #define __UA_ADDR ".dword" > -#define __UA_LA "la.abs" > #define __UA_LIMIT __ua_limit > > /* > diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S > index d53b631c9022..ca01afdbec3f 100644 > --- a/arch/loongarch/kernel/entry.S > +++ b/arch/loongarch/kernel/entry.S > @@ -18,9 +18,9 @@ > .text > .cfi_sections .debug_frame > .align 5 > -SYM_FUNC_START(handle_syscall) > +SYM_FUNC_START(handle_sys) > csrrd t0, PERCPU_BASE_KS > - la.abs t1, kernelsp > + la.pcrel t1, kernelsp > add.d t1, t1, t0 > move t2, sp > ld.d sp, t1, 0 > @@ -66,7 +66,7 @@ SYM_FUNC_START(handle_syscall) > bl do_syscall > > RESTORE_ALL_AND_RET > -SYM_FUNC_END(handle_syscall) > +SYM_FUNC_END(handle_sys) > > SYM_CODE_START(ret_from_fork) > bl schedule_tail # a0 = struct task_struct *prev > diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S > index 75e5be807a0d..d3df0fa725a2 100644 > --- a/arch/loongarch/kernel/genex.S > +++ b/arch/loongarch/kernel/genex.S > @@ -32,9 +32,8 @@ SYM_FUNC_START(__arch_cpu_idle) > SYM_FUNC_END(__arch_cpu_idle) > > SYM_FUNC_START(handle_vint) > - BACKUP_T0T1 > SAVE_ALL > - la.abs t1, __arch_cpu_idle > + la.pcrel t1, __arch_cpu_idle > LONG_L t0, sp, PT_ERA > /* 32 byte rollback region */ > ori t0, t0, 0x1f > @@ -43,8 +42,7 @@ SYM_FUNC_START(handle_vint) > LONG_S t0, sp, PT_ERA > 1: move a0, sp > move a1, sp > - la.abs t0, do_vint > - jirl ra, t0, 0 > + bl do_vint > RESTORE_ALL_AND_RET > SYM_FUNC_END(handle_vint) > > @@ -67,12 +65,10 @@ SYM_FUNC_END(except_vec_cex) > .macro BUILD_HANDLER exception handler prep > .align 5 > SYM_FUNC_START(handle_\exception) > - BACKUP_T0T1 > SAVE_ALL > build_prep_\prep > move a0, sp > - la.abs t0, do_\handler > - jirl ra, t0, 0 > + bl do_\handler > RESTORE_ALL_AND_RET > SYM_FUNC_END(handle_\exception) > .endm > @@ -89,7 +85,11 @@ SYM_FUNC_END(except_vec_cex) > BUILD_HANDLER watch watch none > BUILD_HANDLER reserved reserved none /* others */ > > -SYM_FUNC_START(handle_sys) > - la.abs t0, handle_syscall > +SYM_FUNC_START(handler_trampoline) > + csrwr t0, EXCEPTION_KS0 > + csrwr t1, EXCEPTION_KS1 > + pcaddi t0, 0 > + ld.d t0, t0, 16 > jr t0 > -SYM_FUNC_END(handle_sys) > + nop > +SYM_FUNC_END(handler_trampoline) > diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S > index 57bada6b4e93..aa6181714ec3 100644 > --- a/arch/loongarch/kernel/head.S > +++ b/arch/loongarch/kernel/head.S > @@ -117,7 +117,7 @@ SYM_CODE_START(smpboot_entry) > li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 > csrwr t0, LOONGARCH_CSR_EUEN > > - la.abs t0, cpuboot_data > + la.pcrel t0, cpuboot_data > ld.d sp, t0, CPU_BOOT_STACK > ld.d tp, t0, CPU_BOOT_TINFO > > diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c > index 7ea62faeeadb..0e8faaca3679 100644 > --- a/arch/loongarch/kernel/traps.c > +++ b/arch/loongarch/kernel/traps.c > @@ -61,6 +61,7 @@ extern asmlinkage void handle_lasx(void); > extern asmlinkage void handle_reserved(void); > extern asmlinkage void handle_watch(void); > extern asmlinkage void handle_vint(void); > +extern asmlinkage void handler_trampoline(void); > > static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, > const char *loglvl, bool user) > @@ -716,7 +717,8 @@ void per_cpu_trap_init(int cpu) > /* Install CPU exception handler */ > void set_handler(unsigned long offset, void *addr, unsigned long size) > { > - memcpy((void *)(eentry + offset), addr, size); > + memcpy((void *)(eentry + offset), &handler_trampoline, 24); > + memcpy((void *)(eentry + offset + 24), &addr, 8); > local_flush_icache_range(eentry + offset, eentry + offset + size); > } > > diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S > index 58781c6e4191..cfaacdac518c 100644 > --- a/arch/loongarch/mm/tlbex.S > +++ b/arch/loongarch/mm/tlbex.S > @@ -24,8 +24,7 @@ > move a0, sp > REG_S a2, sp, PT_BVADDR > li.w a1, \write > - la.abs t0, do_page_fault > - jirl ra, t0, 0 > + bl do_page_fault > RESTORE_ALL_AND_RET > SYM_FUNC_END(tlb_do_page_fault_\write) > .endm > @@ -34,20 +33,16 @@ > tlb_do_page_fault 1 > > SYM_FUNC_START(handle_tlb_protect) > - BACKUP_T0T1 > SAVE_ALL > move a0, sp > move a1, zero > csrrd a2, LOONGARCH_CSR_BADV > REG_S a2, sp, PT_BVADDR > - la.abs t0, do_page_fault > - jirl ra, t0, 0 > + bl do_page_fault > RESTORE_ALL_AND_RET > SYM_FUNC_END(handle_tlb_protect) > > SYM_FUNC_START(handle_tlb_load) > - csrwr t0, EXCEPTION_KS0 > - csrwr t1, EXCEPTION_KS1 > csrwr ra, EXCEPTION_KS2 > > /* > @@ -116,7 +111,7 @@ smp_pgtable_change_load: > > #ifdef CONFIG_64BIT > vmalloc_load: > - la.abs t1, swapper_pg_dir > + la.pcrel t1, swapper_pg_dir > b vmalloc_done_load > #endif > > @@ -187,13 +182,10 @@ tlb_huge_update_load: > nopage_tlb_load: > dbar 0 > csrrd ra, EXCEPTION_KS2 > - la.abs t0, tlb_do_page_fault_0 > - jr t0 > + b tlb_do_page_fault_0 > SYM_FUNC_END(handle_tlb_load) > > SYM_FUNC_START(handle_tlb_store) > - csrwr t0, EXCEPTION_KS0 > - csrwr t1, EXCEPTION_KS1 > csrwr ra, EXCEPTION_KS2 > > /* > @@ -263,7 +255,7 @@ smp_pgtable_change_store: > > #ifdef CONFIG_64BIT > vmalloc_store: > - la.abs t1, swapper_pg_dir > + la.pcrel t1, swapper_pg_dir > b vmalloc_done_store > #endif > > @@ -336,13 +328,10 @@ tlb_huge_update_store: > nopage_tlb_store: > dbar 0 > csrrd ra, EXCEPTION_KS2 > - la.abs t0, tlb_do_page_fault_1 > - jr t0 > + b tlb_do_page_fault_1 > SYM_FUNC_END(handle_tlb_store) > > SYM_FUNC_START(handle_tlb_modify) > - csrwr t0, EXCEPTION_KS0 > - csrwr t1, EXCEPTION_KS1 > csrwr ra, EXCEPTION_KS2 > > /* > @@ -411,7 +400,7 @@ smp_pgtable_change_modify: > > #ifdef CONFIG_64BIT > vmalloc_modify: > - la.abs t1, swapper_pg_dir > + la.pcrel t1, swapper_pg_dir > b vmalloc_done_modify > #endif > > @@ -483,8 +472,7 @@ tlb_huge_update_modify: > nopage_tlb_modify: > dbar 0 > csrrd ra, EXCEPTION_KS2 > - la.abs t0, tlb_do_page_fault_1 > - jr t0 > + b tlb_do_page_fault_1 > SYM_FUNC_END(handle_tlb_modify) > > SYM_FUNC_START(handle_tlb_refill)
On Sat, Jan 14, 2023 at 2:38 PM Jinyang He <hejinyang@loongson.cn> wrote: > > Hi, Ruoyao and Youling, > > I care about the performance when NUMA enabled. We set CSR.EENTRY > for each possible cpus where is NUMA-relative. So, I guess the more > codes in NUMA-relative memory makes more performance. If we just set > handler_trampoline as exception handler, the performance may be > influenced. So copying both the handlers and handler_trampoline can solve the problem? If that is possible, please do that on top of the latest code in https://github.com/loongson/linux/commits/loongarch-next Huacai > > > Thanks, > > Jinyang > > > On 2023-01-09 17:07, Youling Tang wrote: > > From: Xi Ruoyao <xry111@xry111.site> > > > > Use a trampoline as an exception handlers, which can kill some use of > > la.abs in preparation for the subsequent support of the PIE kernel. > > > > Signed-off-by: Xi Ruoyao <xry111@xry111.site> > > Signed-off-by: Youling Tang <tangyouling@loongson.cn> > > --- > > arch/loongarch/include/asm/stackframe.h | 8 +++---- > > arch/loongarch/include/asm/uaccess.h | 1 - > > arch/loongarch/kernel/entry.S | 6 +++--- > > arch/loongarch/kernel/genex.S | 20 +++++++++--------- > > arch/loongarch/kernel/head.S | 2 +- > > arch/loongarch/kernel/traps.c | 4 +++- > > arch/loongarch/mm/tlbex.S | 28 +++++++------------------ > > 7 files changed, 29 insertions(+), 40 deletions(-) > > > > diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h > > index 4ca953062b5b..96c94035b5d0 100644 > > --- a/arch/loongarch/include/asm/stackframe.h > > +++ b/arch/loongarch/include/asm/stackframe.h > > @@ -76,8 +76,8 @@ > > * kernelsp array for it. It stores the current sp in t0 and loads the > > * new value in sp. > > */ > > - .macro get_saved_sp docfi=0 > > - la.abs t1, kernelsp > > + .macro get_saved_sp docfi=0 > > + la.pcrel t1, kernelsp > > #ifdef CONFIG_SMP > > csrrd t0, PERCPU_BASE_KS > > LONG_ADD t1, t1, t0 > > @@ -89,8 +89,8 @@ > > LONG_L sp, t1, 0 > > .endm > > > > - .macro set_saved_sp stackp temp temp2 > > - la.abs \temp, kernelsp > > + .macro set_saved_sp stackp temp temp2 > > + la.pcrel \temp, kernelsp > > #ifdef CONFIG_SMP > > LONG_ADD \temp, \temp, u0 > > #endif > > diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h > > index 255899d4a7c3..0d22991ae430 100644 > > --- a/arch/loongarch/include/asm/uaccess.h > > +++ b/arch/loongarch/include/asm/uaccess.h > > @@ -22,7 +22,6 @@ > > extern u64 __ua_limit; > > > > #define __UA_ADDR ".dword" > > -#define __UA_LA "la.abs" > > #define __UA_LIMIT __ua_limit > > > > /* > > diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S > > index d53b631c9022..ca01afdbec3f 100644 > > --- a/arch/loongarch/kernel/entry.S > > +++ b/arch/loongarch/kernel/entry.S > > @@ -18,9 +18,9 @@ > > .text > > .cfi_sections .debug_frame > > .align 5 > > -SYM_FUNC_START(handle_syscall) > > +SYM_FUNC_START(handle_sys) > > csrrd t0, PERCPU_BASE_KS > > - la.abs t1, kernelsp > > + la.pcrel t1, kernelsp > > add.d t1, t1, t0 > > move t2, sp > > ld.d sp, t1, 0 > > @@ -66,7 +66,7 @@ SYM_FUNC_START(handle_syscall) > > bl do_syscall > > > > RESTORE_ALL_AND_RET > > -SYM_FUNC_END(handle_syscall) > > +SYM_FUNC_END(handle_sys) > > > > SYM_CODE_START(ret_from_fork) > > bl schedule_tail # a0 = struct task_struct *prev > > diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S > > index 75e5be807a0d..d3df0fa725a2 100644 > > --- a/arch/loongarch/kernel/genex.S > > +++ b/arch/loongarch/kernel/genex.S > > @@ -32,9 +32,8 @@ SYM_FUNC_START(__arch_cpu_idle) > > SYM_FUNC_END(__arch_cpu_idle) > > > > SYM_FUNC_START(handle_vint) > > - BACKUP_T0T1 > > SAVE_ALL > > - la.abs t1, __arch_cpu_idle > > + la.pcrel t1, __arch_cpu_idle > > LONG_L t0, sp, PT_ERA > > /* 32 byte rollback region */ > > ori t0, t0, 0x1f > > @@ -43,8 +42,7 @@ SYM_FUNC_START(handle_vint) > > LONG_S t0, sp, PT_ERA > > 1: move a0, sp > > move a1, sp > > - la.abs t0, do_vint > > - jirl ra, t0, 0 > > + bl do_vint > > RESTORE_ALL_AND_RET > > SYM_FUNC_END(handle_vint) > > > > @@ -67,12 +65,10 @@ SYM_FUNC_END(except_vec_cex) > > .macro BUILD_HANDLER exception handler prep > > .align 5 > > SYM_FUNC_START(handle_\exception) > > - BACKUP_T0T1 > > SAVE_ALL > > build_prep_\prep > > move a0, sp > > - la.abs t0, do_\handler > > - jirl ra, t0, 0 > > + bl do_\handler > > RESTORE_ALL_AND_RET > > SYM_FUNC_END(handle_\exception) > > .endm > > @@ -89,7 +85,11 @@ SYM_FUNC_END(except_vec_cex) > > BUILD_HANDLER watch watch none > > BUILD_HANDLER reserved reserved none /* others */ > > > > -SYM_FUNC_START(handle_sys) > > - la.abs t0, handle_syscall > > +SYM_FUNC_START(handler_trampoline) > > + csrwr t0, EXCEPTION_KS0 > > + csrwr t1, EXCEPTION_KS1 > > + pcaddi t0, 0 > > + ld.d t0, t0, 16 > > jr t0 > > -SYM_FUNC_END(handle_sys) > > + nop > > +SYM_FUNC_END(handler_trampoline) > > diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S > > index 57bada6b4e93..aa6181714ec3 100644 > > --- a/arch/loongarch/kernel/head.S > > +++ b/arch/loongarch/kernel/head.S > > @@ -117,7 +117,7 @@ SYM_CODE_START(smpboot_entry) > > li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 > > csrwr t0, LOONGARCH_CSR_EUEN > > > > - la.abs t0, cpuboot_data > > + la.pcrel t0, cpuboot_data > > ld.d sp, t0, CPU_BOOT_STACK > > ld.d tp, t0, CPU_BOOT_TINFO > > > > diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c > > index 7ea62faeeadb..0e8faaca3679 100644 > > --- a/arch/loongarch/kernel/traps.c > > +++ b/arch/loongarch/kernel/traps.c > > @@ -61,6 +61,7 @@ extern asmlinkage void handle_lasx(void); > > extern asmlinkage void handle_reserved(void); > > extern asmlinkage void handle_watch(void); > > extern asmlinkage void handle_vint(void); > > +extern asmlinkage void handler_trampoline(void); > > > > static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, > > const char *loglvl, bool user) > > @@ -716,7 +717,8 @@ void per_cpu_trap_init(int cpu) > > /* Install CPU exception handler */ > > void set_handler(unsigned long offset, void *addr, unsigned long size) > > { > > - memcpy((void *)(eentry + offset), addr, size); > > + memcpy((void *)(eentry + offset), &handler_trampoline, 24); > > + memcpy((void *)(eentry + offset + 24), &addr, 8); > > local_flush_icache_range(eentry + offset, eentry + offset + size); > > } > > > > diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S > > index 58781c6e4191..cfaacdac518c 100644 > > --- a/arch/loongarch/mm/tlbex.S > > +++ b/arch/loongarch/mm/tlbex.S > > @@ -24,8 +24,7 @@ > > move a0, sp > > REG_S a2, sp, PT_BVADDR > > li.w a1, \write > > - la.abs t0, do_page_fault > > - jirl ra, t0, 0 > > + bl do_page_fault > > RESTORE_ALL_AND_RET > > SYM_FUNC_END(tlb_do_page_fault_\write) > > .endm > > @@ -34,20 +33,16 @@ > > tlb_do_page_fault 1 > > > > SYM_FUNC_START(handle_tlb_protect) > > - BACKUP_T0T1 > > SAVE_ALL > > move a0, sp > > move a1, zero > > csrrd a2, LOONGARCH_CSR_BADV > > REG_S a2, sp, PT_BVADDR > > - la.abs t0, do_page_fault > > - jirl ra, t0, 0 > > + bl do_page_fault > > RESTORE_ALL_AND_RET > > SYM_FUNC_END(handle_tlb_protect) > > > > SYM_FUNC_START(handle_tlb_load) > > - csrwr t0, EXCEPTION_KS0 > > - csrwr t1, EXCEPTION_KS1 > > csrwr ra, EXCEPTION_KS2 > > > > /* > > @@ -116,7 +111,7 @@ smp_pgtable_change_load: > > > > #ifdef CONFIG_64BIT > > vmalloc_load: > > - la.abs t1, swapper_pg_dir > > + la.pcrel t1, swapper_pg_dir > > b vmalloc_done_load > > #endif > > > > @@ -187,13 +182,10 @@ tlb_huge_update_load: > > nopage_tlb_load: > > dbar 0 > > csrrd ra, EXCEPTION_KS2 > > - la.abs t0, tlb_do_page_fault_0 > > - jr t0 > > + b tlb_do_page_fault_0 > > SYM_FUNC_END(handle_tlb_load) > > > > SYM_FUNC_START(handle_tlb_store) > > - csrwr t0, EXCEPTION_KS0 > > - csrwr t1, EXCEPTION_KS1 > > csrwr ra, EXCEPTION_KS2 > > > > /* > > @@ -263,7 +255,7 @@ smp_pgtable_change_store: > > > > #ifdef CONFIG_64BIT > > vmalloc_store: > > - la.abs t1, swapper_pg_dir > > + la.pcrel t1, swapper_pg_dir > > b vmalloc_done_store > > #endif > > > > @@ -336,13 +328,10 @@ tlb_huge_update_store: > > nopage_tlb_store: > > dbar 0 > > csrrd ra, EXCEPTION_KS2 > > - la.abs t0, tlb_do_page_fault_1 > > - jr t0 > > + b tlb_do_page_fault_1 > > SYM_FUNC_END(handle_tlb_store) > > > > SYM_FUNC_START(handle_tlb_modify) > > - csrwr t0, EXCEPTION_KS0 > > - csrwr t1, EXCEPTION_KS1 > > csrwr ra, EXCEPTION_KS2 > > > > /* > > @@ -411,7 +400,7 @@ smp_pgtable_change_modify: > > > > #ifdef CONFIG_64BIT > > vmalloc_modify: > > - la.abs t1, swapper_pg_dir > > + la.pcrel t1, swapper_pg_dir > > b vmalloc_done_modify > > #endif > > > > @@ -483,8 +472,7 @@ tlb_huge_update_modify: > > nopage_tlb_modify: > > dbar 0 > > csrrd ra, EXCEPTION_KS2 > > - la.abs t0, tlb_do_page_fault_1 > > - jr t0 > > + b tlb_do_page_fault_1 > > SYM_FUNC_END(handle_tlb_modify) > > > > SYM_FUNC_START(handle_tlb_refill) >
On Mon, 2023-01-16 at 09:30 +0800, Huacai Chen wrote: > On Sat, Jan 14, 2023 at 2:38 PM Jinyang He <hejinyang@loongson.cn> > wrote: > > > > Hi, Ruoyao and Youling, > > > > I care about the performance when NUMA enabled. We set CSR.EENTRY > > for each possible cpus where is NUMA-relative. So, I guess the more > > codes in NUMA-relative memory makes more performance. If we just set > > handler_trampoline as exception handler, the performance may be > > influenced. > So copying both the handlers and handler_trampoline can solve the > problem? If that is possible, please do that on top of the latest code > in > https://github.com/loongson/linux/commits/loongarch-next Hi folks, I just wrote the trampoline code as a PoC to show "relocatable kernel can work" and there must be some better way. But I'm too sad to write any serious code in this month, and I don't have access to a LoongArch NUMA system. So I think it's better to leave the job for you guys now :). Best regards -- Xi Ruoyao <xry111@xry111.site> School of Aerospace Science and Technology, Xidian University
On Mon, Jan 16, 2023 at 1:41 PM Xi Ruoyao <xry111@xry111.site> wrote: > > On Mon, 2023-01-16 at 09:30 +0800, Huacai Chen wrote: > > On Sat, Jan 14, 2023 at 2:38 PM Jinyang He <hejinyang@loongson.cn> > > wrote: > > > > > > Hi, Ruoyao and Youling, > > > > > > I care about the performance when NUMA enabled. We set CSR.EENTRY > > > for each possible cpus where is NUMA-relative. So, I guess the more > > > codes in NUMA-relative memory makes more performance. If we just set > > > handler_trampoline as exception handler, the performance may be > > > influenced. > > So copying both the handlers and handler_trampoline can solve the > > problem? If that is possible, please do that on top of the latest code > > in > > https://github.com/loongson/linux/commits/loongarch-next > > Hi folks, > > I just wrote the trampoline code as a PoC to show "relocatable kernel > can work" and there must be some better way. But I'm too sad to write > any serious code in this month, and I don't have access to a LoongArch > NUMA system. So I think it's better to leave the job for you guys now > :). Hmm, I hope this series can be merged in 6.3. :) Huacai > > Best regards > > -- > Xi Ruoyao <xry111@xry111.site> > School of Aerospace Science and Technology, Xidian University
© 2016 - 2025 Red Hat, Inc.