[PATCH v6 3/4] LoongArch: entry: Migrate ret_from_fork() to C

Charlie Jenkins posted 4 patches 9 months ago
[PATCH v6 3/4] LoongArch: entry: Migrate ret_from_fork() to C
Posted by Charlie Jenkins 9 months ago
LoongArch is the only architecture that calls
syscall_exit_to_user_mode() from asm. Move the call into C so that this
function can be inlined across all architectures.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/loongarch/include/asm/asm-prototypes.h |  8 +++++++
 arch/loongarch/kernel/entry.S               | 22 +++++++++----------
 arch/loongarch/kernel/process.c             | 33 +++++++++++++++++++++++------
 3 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index 51f224bcfc654228ae423e9a066b25b35102a5b9..704066b4f7368be15be960fadbcd6c2574bbf6c0 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -12,3 +12,11 @@ __int128_t __ashlti3(__int128_t a, int b);
 __int128_t __ashrti3(__int128_t a, int b);
 __int128_t __lshrti3(__int128_t a, int b);
 #endif
+
+asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
+							   struct pt_regs *regs);
+
+asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+								    struct pt_regs *regs,
+								    int (*fn)(void *),
+								    void *fn_arg);
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index 48e7e34e355e83eae8165957ba2eac05a8bf17df..2abc29e573810e000f2fef4646ddca0dbb80eabe 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -77,24 +77,22 @@ SYM_CODE_START(handle_syscall)
 SYM_CODE_END(handle_syscall)
 _ASM_NOKPROBE(handle_syscall)
 
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
 	UNWIND_HINT_REGS
-	bl		schedule_tail		# a0 = struct task_struct *prev
-	move		a0, sp
-	bl 		syscall_exit_to_user_mode
+	move		a1, sp
+	bl 		ret_from_fork
 	RESTORE_STATIC
 	RESTORE_SOME
 	RESTORE_SP_AND_RET
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_asm)
 
-SYM_CODE_START(ret_from_kernel_thread)
+SYM_CODE_START(ret_from_kernel_thread_asm)
 	UNWIND_HINT_REGS
-	bl		schedule_tail		# a0 = struct task_struct *prev
-	move		a0, s1
-	jirl		ra, s0, 0
-	move		a0, sp
-	bl		syscall_exit_to_user_mode
+	move		a1, sp
+	move		a2, s0
+	move		a3, s1
+	bl		ret_from_kernel_thread
 	RESTORE_STATIC
 	RESTORE_SOME
 	RESTORE_SP_AND_RET
-SYM_CODE_END(ret_from_kernel_thread)
+SYM_CODE_END(ret_from_kernel_thread_asm)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 6e58f65455c7ca3eae2e88ed852c8655a6701e5c..98bc60d7c550fcc0225e8452f81a7d6cd7888015 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/entry-common.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
@@ -33,6 +34,7 @@
 #include <linux/prctl.h>
 #include <linux/nmi.h>
 
+#include <asm/asm-prototypes.h>
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
@@ -47,6 +49,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/reg.h>
+#include <asm/switch_to.h>
 #include <asm/unwind.h>
 #include <asm/vdso.h>
 
@@ -63,8 +66,9 @@ EXPORT_SYMBOL(__stack_chk_guard);
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-asmlinkage void ret_from_fork(void);
-asmlinkage void ret_from_kernel_thread(void);
+asmlinkage void restore_and_ret(void);
+asmlinkage void ret_from_fork_asm(void);
+asmlinkage void ret_from_kernel_thread_asm(void);
 
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
@@ -138,6 +142,23 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
+asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
+							   struct pt_regs *regs)
+{
+	schedule_tail(prev);
+	syscall_exit_to_user_mode(regs);
+}
+
+asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+								    struct pt_regs *regs,
+								    int (*fn)(void *),
+								    void *fn_arg)
+{
+	schedule_tail(prev);
+	fn(fn_arg);
+	syscall_exit_to_user_mode(regs);
+}
+
 /*
  * Copy architecture-specific thread state
  */
@@ -165,8 +186,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		p->thread.reg03 = childksp;
 		p->thread.reg23 = (unsigned long)args->fn;
 		p->thread.reg24 = (unsigned long)args->fn_arg;
-		p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
-		p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
+		p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm;
+		p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm;
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->csr_euen = p->thread.csr_euen;
 		childregs->csr_crmd = p->thread.csr_crmd;
@@ -182,8 +203,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		childregs->regs[3] = usp;
 
 	p->thread.reg03 = (unsigned long) childregs;
-	p->thread.reg01 = (unsigned long) ret_from_fork;
-	p->thread.sched_ra = (unsigned long) ret_from_fork;
+	p->thread.reg01 = (unsigned long) ret_from_fork_asm;
+	p->thread.sched_ra = (unsigned long) ret_from_fork_asm;
 
 	/*
 	 * New tasks lose permission to use the fpu. This accelerates context

-- 
2.43.0
Re: [PATCH v6 3/4] LoongArch: entry: Migrate ret_from_fork() to C
Posted by Huacai Chen 7 months, 2 weeks ago
Hi, Charlie,

There are some small issues.

On Fri, Mar 21, 2025 at 1:29 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> LoongArch is the only architecture that calls
> syscall_exit_to_user_mode() from asm. Move the call into C so that this
> function can be inlined across all architectures.
>
> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> ---
>  arch/loongarch/include/asm/asm-prototypes.h |  8 +++++++
>  arch/loongarch/kernel/entry.S               | 22 +++++++++----------
>  arch/loongarch/kernel/process.c             | 33 +++++++++++++++++++++++------
>  3 files changed, 45 insertions(+), 18 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
> index 51f224bcfc654228ae423e9a066b25b35102a5b9..704066b4f7368be15be960fadbcd6c2574bbf6c0 100644
> --- a/arch/loongarch/include/asm/asm-prototypes.h
> +++ b/arch/loongarch/include/asm/asm-prototypes.h
> @@ -12,3 +12,11 @@ __int128_t __ashlti3(__int128_t a, int b);
>  __int128_t __ashrti3(__int128_t a, int b);
>  __int128_t __lshrti3(__int128_t a, int b);
>  #endif
> +
> +asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
> +                                                          struct pt_regs *regs);
> +
> +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> +                                                                   struct pt_regs *regs,
> +                                                                   int (*fn)(void *),
> +                                                                   void *fn_arg);
> diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
> index 48e7e34e355e83eae8165957ba2eac05a8bf17df..2abc29e573810e000f2fef4646ddca0dbb80eabe 100644
> --- a/arch/loongarch/kernel/entry.S
> +++ b/arch/loongarch/kernel/entry.S
> @@ -77,24 +77,22 @@ SYM_CODE_START(handle_syscall)
>  SYM_CODE_END(handle_syscall)
>  _ASM_NOKPROBE(handle_syscall)
>
> -SYM_CODE_START(ret_from_fork)
> +SYM_CODE_START(ret_from_fork_asm)
>         UNWIND_HINT_REGS
> -       bl              schedule_tail           # a0 = struct task_struct *prev
> -       move            a0, sp
> -       bl              syscall_exit_to_user_mode
> +       move            a1, sp
> +       bl              ret_from_fork
>         RESTORE_STATIC
>         RESTORE_SOME
>         RESTORE_SP_AND_RET
> -SYM_CODE_END(ret_from_fork)
> +SYM_CODE_END(ret_from_fork_asm)
>
> -SYM_CODE_START(ret_from_kernel_thread)
> +SYM_CODE_START(ret_from_kernel_thread_asm)
>         UNWIND_HINT_REGS
> -       bl              schedule_tail           # a0 = struct task_struct *prev
> -       move            a0, s1
> -       jirl            ra, s0, 0
> -       move            a0, sp
> -       bl              syscall_exit_to_user_mode
> +       move            a1, sp
> +       move            a2, s0
> +       move            a3, s1
> +       bl              ret_from_kernel_thread
>         RESTORE_STATIC
>         RESTORE_SOME
>         RESTORE_SP_AND_RET
> -SYM_CODE_END(ret_from_kernel_thread)
> +SYM_CODE_END(ret_from_kernel_thread_asm)
> diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
> index 6e58f65455c7ca3eae2e88ed852c8655a6701e5c..98bc60d7c550fcc0225e8452f81a7d6cd7888015 100644
> --- a/arch/loongarch/kernel/process.c
> +++ b/arch/loongarch/kernel/process.c
> @@ -14,6 +14,7 @@
>  #include <linux/init.h>
>  #include <linux/kernel.h>
>  #include <linux/errno.h>
> +#include <linux/entry-common.h>
For alpa-betical order, it should be before errno.h.

>  #include <linux/sched.h>
>  #include <linux/sched/debug.h>
>  #include <linux/sched/task.h>
> @@ -33,6 +34,7 @@
>  #include <linux/prctl.h>
>  #include <linux/nmi.h>
>
> +#include <asm/asm-prototypes.h>
For alpa-betical order, it should be after asm.h.


Huacai

>  #include <asm/asm.h>
>  #include <asm/bootinfo.h>
>  #include <asm/cpu.h>
> @@ -47,6 +49,7 @@
>  #include <asm/pgtable.h>
>  #include <asm/processor.h>
>  #include <asm/reg.h>
> +#include <asm/switch_to.h>
>  #include <asm/unwind.h>
>  #include <asm/vdso.h>
>
> @@ -63,8 +66,9 @@ EXPORT_SYMBOL(__stack_chk_guard);
>  unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
>  EXPORT_SYMBOL(boot_option_idle_override);
>
> -asmlinkage void ret_from_fork(void);
> -asmlinkage void ret_from_kernel_thread(void);
> +asmlinkage void restore_and_ret(void);
> +asmlinkage void ret_from_fork_asm(void);
> +asmlinkage void ret_from_kernel_thread_asm(void);
>
>  void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
>  {
> @@ -138,6 +142,23 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>         return 0;
>  }
>
> +asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
> +                                                          struct pt_regs *regs)
> +{
> +       schedule_tail(prev);
> +       syscall_exit_to_user_mode(regs);
> +}
> +
> +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> +                                                                   struct pt_regs *regs,
> +                                                                   int (*fn)(void *),
> +                                                                   void *fn_arg)
> +{
> +       schedule_tail(prev);
> +       fn(fn_arg);
> +       syscall_exit_to_user_mode(regs);
> +}
> +
>  /*
>   * Copy architecture-specific thread state
>   */
> @@ -165,8 +186,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>                 p->thread.reg03 = childksp;
>                 p->thread.reg23 = (unsigned long)args->fn;
>                 p->thread.reg24 = (unsigned long)args->fn_arg;
> -               p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
> -               p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
> +               p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm;
> +               p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm;
>                 memset(childregs, 0, sizeof(struct pt_regs));
>                 childregs->csr_euen = p->thread.csr_euen;
>                 childregs->csr_crmd = p->thread.csr_crmd;
> @@ -182,8 +203,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>                 childregs->regs[3] = usp;
>
>         p->thread.reg03 = (unsigned long) childregs;
> -       p->thread.reg01 = (unsigned long) ret_from_fork;
> -       p->thread.sched_ra = (unsigned long) ret_from_fork;
> +       p->thread.reg01 = (unsigned long) ret_from_fork_asm;
> +       p->thread.sched_ra = (unsigned long) ret_from_fork_asm;
>
>         /*
>          * New tasks lose permission to use the fpu. This accelerates context
>
> --
> 2.43.0
>
[tip: core/entry] LoongArch: entry: Migrate ret_from_fork() to C
Posted by tip-bot2 for Charlie Jenkins 7 months, 3 weeks ago
The following commit has been merged into the core/entry branch of tip:

Commit-ID:     7ace1602abf21da505993d77ccbae1df2496b324
Gitweb:        https://git.kernel.org/tip/7ace1602abf21da505993d77ccbae1df2496b324
Author:        Charlie Jenkins <charlie@rivosinc.com>
AuthorDate:    Thu, 20 Mar 2025 10:29:23 -07:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 29 Apr 2025 08:27:10 +02:00

LoongArch: entry: Migrate ret_from_fork() to C

LoongArch is the only architecture that calls syscall_exit_to_user_mode()
from assembly.

Move the call into C so that this function can be inlined across all
architectures.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250320-riscv_optimize_entry-v6-3-63e187e26041@rivosinc.com

---
 arch/loongarch/include/asm/asm-prototypes.h |  8 +++++-
 arch/loongarch/kernel/entry.S               | 22 +++++--------
 arch/loongarch/kernel/process.c             | 33 ++++++++++++++++----
 3 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index 51f224b..704066b 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -12,3 +12,11 @@ __int128_t __ashlti3(__int128_t a, int b);
 __int128_t __ashrti3(__int128_t a, int b);
 __int128_t __lshrti3(__int128_t a, int b);
 #endif
+
+asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
+							   struct pt_regs *regs);
+
+asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+								    struct pt_regs *regs,
+								    int (*fn)(void *),
+								    void *fn_arg);
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index 48e7e34..2abc29e 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -77,24 +77,22 @@ SYM_CODE_START(handle_syscall)
 SYM_CODE_END(handle_syscall)
 _ASM_NOKPROBE(handle_syscall)
 
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
 	UNWIND_HINT_REGS
-	bl		schedule_tail		# a0 = struct task_struct *prev
-	move		a0, sp
-	bl 		syscall_exit_to_user_mode
+	move		a1, sp
+	bl 		ret_from_fork
 	RESTORE_STATIC
 	RESTORE_SOME
 	RESTORE_SP_AND_RET
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_asm)
 
-SYM_CODE_START(ret_from_kernel_thread)
+SYM_CODE_START(ret_from_kernel_thread_asm)
 	UNWIND_HINT_REGS
-	bl		schedule_tail		# a0 = struct task_struct *prev
-	move		a0, s1
-	jirl		ra, s0, 0
-	move		a0, sp
-	bl		syscall_exit_to_user_mode
+	move		a1, sp
+	move		a2, s0
+	move		a3, s1
+	bl		ret_from_kernel_thread
 	RESTORE_STATIC
 	RESTORE_SOME
 	RESTORE_SP_AND_RET
-SYM_CODE_END(ret_from_kernel_thread)
+SYM_CODE_END(ret_from_kernel_thread_asm)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 6e58f65..98bc60d 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/entry-common.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
@@ -33,6 +34,7 @@
 #include <linux/prctl.h>
 #include <linux/nmi.h>
 
+#include <asm/asm-prototypes.h>
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
@@ -47,6 +49,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/reg.h>
+#include <asm/switch_to.h>
 #include <asm/unwind.h>
 #include <asm/vdso.h>
 
@@ -63,8 +66,9 @@ EXPORT_SYMBOL(__stack_chk_guard);
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-asmlinkage void ret_from_fork(void);
-asmlinkage void ret_from_kernel_thread(void);
+asmlinkage void restore_and_ret(void);
+asmlinkage void ret_from_fork_asm(void);
+asmlinkage void ret_from_kernel_thread_asm(void);
 
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
@@ -138,6 +142,23 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
+asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
+							   struct pt_regs *regs)
+{
+	schedule_tail(prev);
+	syscall_exit_to_user_mode(regs);
+}
+
+asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+								    struct pt_regs *regs,
+								    int (*fn)(void *),
+								    void *fn_arg)
+{
+	schedule_tail(prev);
+	fn(fn_arg);
+	syscall_exit_to_user_mode(regs);
+}
+
 /*
  * Copy architecture-specific thread state
  */
@@ -165,8 +186,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		p->thread.reg03 = childksp;
 		p->thread.reg23 = (unsigned long)args->fn;
 		p->thread.reg24 = (unsigned long)args->fn_arg;
-		p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
-		p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
+		p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm;
+		p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm;
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->csr_euen = p->thread.csr_euen;
 		childregs->csr_crmd = p->thread.csr_crmd;
@@ -182,8 +203,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		childregs->regs[3] = usp;
 
 	p->thread.reg03 = (unsigned long) childregs;
-	p->thread.reg01 = (unsigned long) ret_from_fork;
-	p->thread.sched_ra = (unsigned long) ret_from_fork;
+	p->thread.reg01 = (unsigned long) ret_from_fork_asm;
+	p->thread.sched_ra = (unsigned long) ret_from_fork_asm;
 
 	/*
 	 * New tasks lose permission to use the fpu. This accelerates context