[PATCH v8 11/12] arm64: entry: Convert to generic entry

Jinjie Ruan posted 12 patches 5 days, 16 hours ago
[PATCH v8 11/12] arm64: entry: Convert to generic entry
Posted by Jinjie Ruan 5 days, 16 hours ago
Currently, x86, Riscv, Loongarch use the generic entry which makes
maintainers' work easier and codes more elegant. arm64 has already
switched to the generic IRQ entry, so completely convert arm64 to use
the generic entry infrastructure from kernel/entry/*.

The changes are below:
 - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK,
   and remove has_syscall_work(), as _TIF_SYSCALL_WORK is equal with
   SYSCALL_WORK_ENTER.

 - Implement arch_ptrace_report_syscall_entry/exit() with
   report_syscall_entry/exit() to do arm64-specific save/restore
   during syscall entry/exit.

 - Add "ARCH_SYSCALL_WORK_EXIT" to be defined as "_TIF_SECCOMP |
   _TIF_SYSCALL_EMU" to keep the arm64 behaviour unchanged.

 - Remove arm64 syscall_trace_enter(), syscall_exit_to_user_mode_prepare(),
   and related sub-functions including syscall_exit_work() and
   syscall_enter_audit(), by calling generic entry's functions with similar
   functionality.

- Implement arch_syscall_is_vdso_sigreturn() to support "Syscall User
  Dispatch".

Suggested-by: Kevin Brodsky <kevin.brodsky@arm.com>
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 arch/arm64/Kconfig                    |   2 +-
 arch/arm64/include/asm/entry-common.h |  76 ++++++++++++++
 arch/arm64/include/asm/syscall.h      |  20 +++-
 arch/arm64/include/asm/thread_info.h  |  16 +--
 arch/arm64/kernel/debug-monitors.c    |   7 ++
 arch/arm64/kernel/ptrace.c            | 138 --------------------------
 arch/arm64/kernel/signal.c            |   2 +-
 arch/arm64/kernel/syscall.c           |   6 +-
 8 files changed, 108 insertions(+), 159 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6663ffd23f25..1463ff15d67a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -152,9 +152,9 @@ config ARM64
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_ENTRY
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IOREMAP
-	select GENERIC_IRQ_ENTRY
 	select GENERIC_IRQ_IPI
 	select GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD
 	select GENERIC_IRQ_PROBE
diff --git a/arch/arm64/include/asm/entry-common.h b/arch/arm64/include/asm/entry-common.h
index cab8cd78f693..ab0544b44549 100644
--- a/arch/arm64/include/asm/entry-common.h
+++ b/arch/arm64/include/asm/entry-common.h
@@ -3,14 +3,21 @@
 #ifndef _ASM_ARM64_ENTRY_COMMON_H
 #define _ASM_ARM64_ENTRY_COMMON_H
 
+#include <linux/ptrace.h>
 #include <linux/thread_info.h>
 
+#include <asm/compat.h>
 #include <asm/cpufeature.h>
 #include <asm/daifflags.h>
 #include <asm/fpsimd.h>
 #include <asm/mte.h>
 #include <asm/stacktrace.h>
 
+enum ptrace_syscall_dir {
+	PTRACE_SYSCALL_ENTER = 0,
+	PTRACE_SYSCALL_EXIT,
+};
+
 #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_MTE_ASYNC_FAULT | _TIF_FOREIGN_FPSTATE)
 
 static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
@@ -54,4 +61,73 @@ static inline bool arch_irqentry_exit_need_resched(void)
 
 #define arch_irqentry_exit_need_resched arch_irqentry_exit_need_resched
 
+static inline unsigned long ptrace_save_reg(struct pt_regs *regs,
+					    enum ptrace_syscall_dir dir,
+					    int *regno)
+{
+	unsigned long saved_reg;
+
+	/*
+	 * We have some ABI weirdness here in the way that we handle syscall
+	 * exit stops because we indicate whether or not the stop has been
+	 * signalled from syscall entry or syscall exit by clobbering a general
+	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
+	 * and restoring its old value after the stop. This means that:
+	 *
+	 * - Any writes by the tracer to this register during the stop are
+	 *   ignored/discarded.
+	 *
+	 * - The actual value of the register is not available during the stop,
+	 *   so the tracer cannot save it and restore it later.
+	 *
+	 * - Syscall stops behave differently to seccomp and pseudo-step traps
+	 *   (the latter do not nobble any registers).
+	 */
+	*regno = (is_compat_task() ? 12 : 7);
+	saved_reg = regs->regs[*regno];
+	regs->regs[*regno] = dir;
+
+	return saved_reg;
+}
+
+static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs)
+{
+	unsigned long saved_reg;
+	int regno, ret;
+
+	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_ENTER, &regno);
+	ret = ptrace_report_syscall_entry(regs);
+	if (ret)
+		forget_syscall(regs);
+	regs->regs[regno] = saved_reg;
+
+	return ret;
+}
+
+#define arch_ptrace_report_syscall_entry arch_ptrace_report_syscall_entry
+
+static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
+							    int step)
+{
+	unsigned long saved_reg;
+	int regno;
+
+	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
+	if (!step) {
+		ptrace_report_syscall_exit(regs, 0);
+		regs->regs[regno] = saved_reg;
+	} else {
+		regs->regs[regno] = saved_reg;
+
+		/*
+		 * Signal a pseudo-step exception since we are stepping but
+		 * tracer modifications to the registers may have rewound the
+		 * state machine.
+		 */
+		ptrace_report_syscall_exit(regs, 1);
+	}
+}
+
+#define arch_ptrace_report_syscall_exit arch_ptrace_report_syscall_exit
+
 #endif /* _ASM_ARM64_ENTRY_COMMON_H */
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 6225981fbbdb..f705ba2bb6fd 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -9,6 +9,9 @@
 #include <linux/compat.h>
 #include <linux/err.h>
 
+#include <asm/compat.h>
+#include <asm/vdso.h>
+
 typedef long (*syscall_fn_t)(const struct pt_regs *regs);
 
 extern const syscall_fn_t sys_call_table[];
@@ -114,12 +117,21 @@ static inline int syscall_get_arch(struct task_struct *task)
 	return AUDIT_ARCH_AARCH64;
 }
 
-static inline bool has_syscall_work(unsigned long flags)
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
 {
-	return unlikely(flags & _TIF_SYSCALL_WORK);
+	unsigned long sigtramp;
+
+#ifdef CONFIG_COMPAT
+	if (is_compat_task()) {
+		unsigned long vdso = (unsigned long)current->mm->context.sigpage;
+
+		return (regs->pc >= vdso && regs->pc < (vdso + PAGE_SIZE));
+	}
+#endif
+	sigtramp = (unsigned long)VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
+	return regs->pc == (sigtramp + 8);
 }
 
-int syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long flags);
-void syscall_exit_to_user_mode_prepare(struct pt_regs *regs);
+#define ARCH_SYSCALL_WORK_EXIT	(_TIF_SECCOMP | _TIF_SYSCALL_EMU)
 
 #endif	/* __ASM_SYSCALL_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index ff4998fa1844..d3142b5d1b9c 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -43,6 +43,7 @@ struct thread_info {
 	void			*scs_sp;
 #endif
 	u32			cpu;
+	unsigned long		syscall_work;   /* SYSCALL_WORK_ flags */
 };
 
 #define thread_saved_pc(tsk)	\
@@ -65,11 +66,8 @@ void arch_setup_new_exec(void);
 #define TIF_UPROBE		5	/* uprobe breakpoint or singlestep */
 #define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
 #define TIF_NOTIFY_SIGNAL	7	/* signal notifications exist */
-#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
-#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
-#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
-#define TIF_SECCOMP		11	/* syscall secure computing */
-#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
+#define TIF_SECCOMP		11      /* syscall secure computing */
+#define TIF_SYSCALL_EMU		12      /* syscall emulation active */
 #define TIF_PATCH_PENDING	13	/* pending live patching update */
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_FREEZE		19
@@ -92,24 +90,16 @@ void arch_setup_new_exec(void);
 #define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_PATCH_PENDING	(1 << TIF_PATCH_PENDING)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_SVE		(1 << TIF_SVE)
 #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
 #define _TIF_TSC_SIGSEGV	(1 << TIF_TSC_SIGSEGV)
 
-#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
-				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
-				 _TIF_SYSCALL_EMU)
-
 #ifdef CONFIG_SHADOW_CALL_STACK
 #define INIT_SCS							\
 	.scs_base	= init_shadow_call_stack,			\
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 29307642f4c9..e67643a70405 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -385,11 +385,18 @@ void user_enable_single_step(struct task_struct *task)
 
 	if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP))
 		set_regs_spsr_ss(task_pt_regs(task));
+
+	/*
+	 * Ensure that a trap is triggered once stepping out of a system
+	 * call prior to executing any user instruction.
+	 */
+	set_task_syscall_work(task, SYSCALL_EXIT_TRAP);
 }
 NOKPROBE_SYMBOL(user_enable_single_step);
 
 void user_disable_single_step(struct task_struct *task)
 {
 	clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
+	clear_task_syscall_work(task, SYSCALL_EXIT_TRAP);
 }
 NOKPROBE_SYMBOL(user_disable_single_step);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index c2bd0130212d..9e3b39e207d1 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -42,9 +42,6 @@
 #include <asm/traps.h>
 #include <asm/system_misc.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
 struct pt_regs_offset {
 	const char *name;
 	int offset;
@@ -2312,141 +2309,6 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ptrace_request(child, request, addr, data);
 }
 
-enum ptrace_syscall_dir {
-	PTRACE_SYSCALL_ENTER = 0,
-	PTRACE_SYSCALL_EXIT,
-};
-
-static inline unsigned long ptrace_save_reg(struct pt_regs *regs,
-					    enum ptrace_syscall_dir dir,
-					    int *regno)
-{
-	unsigned long saved_reg;
-
-	/*
-	 * We have some ABI weirdness here in the way that we handle syscall
-	 * exit stops because we indicate whether or not the stop has been
-	 * signalled from syscall entry or syscall exit by clobbering a general
-	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
-	 * and restoring its old value after the stop. This means that:
-	 *
-	 * - Any writes by the tracer to this register during the stop are
-	 *   ignored/discarded.
-	 *
-	 * - The actual value of the register is not available during the stop,
-	 *   so the tracer cannot save it and restore it later.
-	 *
-	 * - Syscall stops behave differently to seccomp and pseudo-step traps
-	 *   (the latter do not nobble any registers).
-	 */
-	*regno = (is_compat_task() ? 12 : 7);
-	saved_reg = regs->regs[*regno];
-	regs->regs[*regno] = dir;
-
-	return saved_reg;
-}
-
-static int report_syscall_entry(struct pt_regs *regs)
-{
-	unsigned long saved_reg;
-	int regno, ret;
-
-	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_ENTER, &regno);
-	ret = ptrace_report_syscall_entry(regs);
-	if (ret)
-		forget_syscall(regs);
-	regs->regs[regno] = saved_reg;
-
-	return ret;
-}
-
-static void report_syscall_exit(struct pt_regs *regs)
-{
-	unsigned long saved_reg;
-	int regno;
-
-	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
-	if (!test_thread_flag(TIF_SINGLESTEP)) {
-		ptrace_report_syscall_exit(regs, 0);
-		regs->regs[regno] = saved_reg;
-	} else {
-		regs->regs[regno] = saved_reg;
-
-		/*
-		 * Signal a pseudo-step exception since we are stepping but
-		 * tracer modifications to the registers may have rewound the
-		 * state machine.
-		 */
-		ptrace_report_syscall_exit(regs, 1);
-	}
-}
-
-static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
-{
-	if (unlikely(audit_context())) {
-		unsigned long args[6];
-
-		syscall_get_arguments(current, regs, args);
-		audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
-	}
-}
-
-int syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long flags)
-{
-	int ret;
-
-	if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
-		ret = report_syscall_entry(regs);
-		if (ret || (flags & _TIF_SYSCALL_EMU))
-			return NO_SYSCALL;
-	}
-
-	/* Do the secure computing after ptrace; failures should be fast. */
-	if (flags & _TIF_SECCOMP) {
-		ret = __secure_computing();
-		if (ret == -1)
-			return NO_SYSCALL;
-	}
-
-	/* Either of the above might have changed the syscall number */
-	syscall = syscall_get_nr(current, regs);
-
-	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) {
-		trace_sys_enter(regs, syscall);
-
-		/*
-		 * Probes or BPF hooks in the tracepoint may have changed the
-		 * system call number as well.
-		 */
-		 syscall = syscall_get_nr(current, regs);
-	}
-
-	syscall_enter_audit(regs, syscall);
-
-	return ret ? : syscall;
-}
-
-static void syscall_exit_work(struct pt_regs *regs, unsigned long flags)
-{
-	audit_syscall_exit(regs);
-
-	if (flags & _TIF_SYSCALL_TRACEPOINT)
-		trace_sys_exit(regs, syscall_get_return_value(current, regs));
-
-	if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
-		report_syscall_exit(regs);
-}
-
-void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
-{
-	unsigned long flags = read_thread_flags();
-
-	rseq_syscall(regs);
-
-	if (has_syscall_work(flags) || flags & _TIF_SINGLESTEP)
-		syscall_exit_work(regs, flags);
-}
-
 /*
  * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a.
  * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 1110eeb21f57..d3ec1892b3c7 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -8,8 +8,8 @@
 
 #include <linux/cache.h>
 #include <linux/compat.h>
+#include <linux/entry-common.h>
 #include <linux/errno.h>
-#include <linux/irq-entry-common.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/freezer.h>
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 65021d0f49e1..9848772c63fd 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -2,6 +2,7 @@
 
 #include <linux/compiler.h>
 #include <linux/context_tracking.h>
+#include <linux/entry-common.h>
 #include <linux/errno.h>
 #include <linux/nospec.h>
 #include <linux/ptrace.h>
@@ -68,6 +69,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
 static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 			   const syscall_fn_t syscall_table[])
 {
+	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
 	unsigned long flags = read_thread_flags();
 
 	regs->orig_x0 = regs->regs[0];
@@ -101,7 +103,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 		return;
 	}
 
-	if (has_syscall_work(flags)) {
+	if (work & SYSCALL_WORK_ENTER) {
 		/*
 		 * The de-facto standard way to skip a system call using ptrace
 		 * is to set the system call to -1 (NO_SYSCALL) and set x0 to a
@@ -119,7 +121,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 		 */
 		if (scno == NO_SYSCALL)
 			syscall_set_return_value(current, regs, -ENOSYS, 0);
-		scno = syscall_trace_enter(regs, scno, flags);
+		scno = syscall_trace_enter(regs, scno, work);
 		if (scno == NO_SYSCALL)
 			goto trace_exit;
 	}
-- 
2.34.1
Re: [PATCH v8 11/12] arm64: entry: Convert to generic entry
Posted by Kevin Brodsky 4 days, 10 hours ago
On 26/11/2025 08:14, Jinjie Ruan wrote:
> Currently, x86, Riscv, Loongarch use the generic entry which makes
> maintainers' work easier and codes more elegant. arm64 has already
> switched to the generic IRQ entry, so completely convert arm64 to use
> the generic entry infrastructure from kernel/entry/*.
>
> The changes are below:
>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK,

_TIF_WORK_MASK is now removed in patch 1.

>    and remove has_syscall_work(), as _TIF_SYSCALL_WORK is equal with
>    SYSCALL_WORK_ENTER.
>
> [...]
>
> +static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
> +							    int step)
> +{
> +	unsigned long saved_reg;
> +	int regno;
> +
> +	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
> +	if (!step) {

A difference I noticed here is that the generic report_single_step()
always returns false if SYSCALL_EMU is set. I don't know if the
combination of SYSCALL_EMU and SINGLESTEP is meaningful, but if it is
then I think that's a behaviour change.

> +		ptrace_report_syscall_exit(regs, 0);
> +		regs->regs[regno] = saved_reg;
> +	} else {
> +		regs->regs[regno] = saved_reg;
> +
> +		/*
> +		 * Signal a pseudo-step exception since we are stepping but
> +		 * tracer modifications to the registers may have rewound the
> +		 * state machine.
> +		 */
> +		ptrace_report_syscall_exit(regs, 1);
> +	}
> +}
> +
> +#define arch_ptrace_report_syscall_exit arch_ptrace_report_syscall_exit
> +
>  #endif /* _ASM_ARM64_ENTRY_COMMON_H */
> diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
> index 6225981fbbdb..f705ba2bb6fd 100644
> --- a/arch/arm64/include/asm/syscall.h
> +++ b/arch/arm64/include/asm/syscall.h
> @@ -9,6 +9,9 @@
>  #include <linux/compat.h>
>  #include <linux/err.h>
>  
> +#include <asm/compat.h>
> +#include <asm/vdso.h>
> +
>  typedef long (*syscall_fn_t)(const struct pt_regs *regs);
>  
>  extern const syscall_fn_t sys_call_table[];
> @@ -114,12 +117,21 @@ static inline int syscall_get_arch(struct task_struct *task)
>  	return AUDIT_ARCH_AARCH64;
>  }
>  
> -static inline bool has_syscall_work(unsigned long flags)
> +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
>  {
> -	return unlikely(flags & _TIF_SYSCALL_WORK);
> +	unsigned long sigtramp;
> +
> +#ifdef CONFIG_COMPAT
> +	if (is_compat_task()) {
> +		unsigned long vdso = (unsigned long)current->mm->context.sigpage;

Might as well call it sigpage (separate from the vDSO on arm32).

> +
> +		return (regs->pc >= vdso && regs->pc < (vdso + PAGE_SIZE));

Nit: no need for parentheses around the expression to return.

> +	}
> +#endif
> +	sigtramp = (unsigned long)VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
> +	return regs->pc == (sigtramp + 8);
>  }
>  
> -int syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long flags);
> -void syscall_exit_to_user_mode_prepare(struct pt_regs *regs);
> +#define ARCH_SYSCALL_WORK_EXIT	(_TIF_SECCOMP | _TIF_SYSCALL_EMU)
>  
>  #endif	/* __ASM_SYSCALL_H */
> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> index ff4998fa1844..d3142b5d1b9c 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -43,6 +43,7 @@ struct thread_info {
>  	void			*scs_sp;
>  #endif
>  	u32			cpu;
> +	unsigned long		syscall_work;   /* SYSCALL_WORK_ flags */
>  };
>  
>  #define thread_saved_pc(tsk)	\
> @@ -65,11 +66,8 @@ void arch_setup_new_exec(void);
>  #define TIF_UPROBE		5	/* uprobe breakpoint or singlestep */
>  #define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
>  #define TIF_NOTIFY_SIGNAL	7	/* signal notifications exist */
> -#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
> -#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
> -#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
> -#define TIF_SECCOMP		11	/* syscall secure computing */
> -#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
> +#define TIF_SECCOMP		11      /* syscall secure computing */
> +#define TIF_SYSCALL_EMU		12      /* syscall emulation active */

These seem to have reappeared in v8 for some reason?

>  #define TIF_PATCH_PENDING	13	/* pending live patching update */
>  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
>  #define TIF_FREEZE		19
> @@ -92,24 +90,16 @@ void arch_setup_new_exec(void);
>  #define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
>  #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
>  #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
> -#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
> -#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
> -#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
>  #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
>  #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)

Ditto.

>  #define _TIF_PATCH_PENDING	(1 << TIF_PATCH_PENDING)
>  #define _TIF_UPROBE		(1 << TIF_UPROBE)
> -#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
>  #define _TIF_32BIT		(1 << TIF_32BIT)
>  #define _TIF_SVE		(1 << TIF_SVE)
>  #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
>  #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
>  #define _TIF_TSC_SIGSEGV	(1 << TIF_TSC_SIGSEGV)
>  
> -#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
> -				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
> -				 _TIF_SYSCALL_EMU)
> -
>  #ifdef CONFIG_SHADOW_CALL_STACK
>  #define INIT_SCS							\
>  	.scs_base	= init_shadow_call_stack,			\
> diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
> index 29307642f4c9..e67643a70405 100644
> --- a/arch/arm64/kernel/debug-monitors.c
> +++ b/arch/arm64/kernel/debug-monitors.c
> @@ -385,11 +385,18 @@ void user_enable_single_step(struct task_struct *task)
>  
>  	if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP))
>  		set_regs_spsr_ss(task_pt_regs(task));
> +
> +	/*
> +	 * Ensure that a trap is triggered once stepping out of a system
> +	 * call prior to executing any user instruction.
> +	 */
> +	set_task_syscall_work(task, SYSCALL_EXIT_TRAP);
>  }
>  NOKPROBE_SYMBOL(user_enable_single_step);
>  
>  void user_disable_single_step(struct task_struct *task)
>  {
>  	clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
> +	clear_task_syscall_work(task, SYSCALL_EXIT_TRAP);
>  }
>  NOKPROBE_SYMBOL(user_disable_single_step);
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index c2bd0130212d..9e3b39e207d1 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -42,9 +42,6 @@
>  #include <asm/traps.h>
>  #include <asm/system_misc.h>
>  
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/syscalls.h>
> -
>  struct pt_regs_offset {
>  	const char *name;
>  	int offset;
> @@ -2312,141 +2309,6 @@ long arch_ptrace(struct task_struct *child, long request,
>  	return ptrace_request(child, request, addr, data);
>  }
>  
> -enum ptrace_syscall_dir {
> -	PTRACE_SYSCALL_ENTER = 0,
> -	PTRACE_SYSCALL_EXIT,
> -};
> -
> -static inline unsigned long ptrace_save_reg(struct pt_regs *regs,
> -					    enum ptrace_syscall_dir dir,
> -					    int *regno)
> -{
> -	unsigned long saved_reg;
> -
> -	/*
> -	 * We have some ABI weirdness here in the way that we handle syscall
> -	 * exit stops because we indicate whether or not the stop has been
> -	 * signalled from syscall entry or syscall exit by clobbering a general
> -	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
> -	 * and restoring its old value after the stop. This means that:
> -	 *
> -	 * - Any writes by the tracer to this register during the stop are
> -	 *   ignored/discarded.
> -	 *
> -	 * - The actual value of the register is not available during the stop,
> -	 *   so the tracer cannot save it and restore it later.
> -	 *
> -	 * - Syscall stops behave differently to seccomp and pseudo-step traps
> -	 *   (the latter do not nobble any registers).
> -	 */
> -	*regno = (is_compat_task() ? 12 : 7);
> -	saved_reg = regs->regs[*regno];
> -	regs->regs[*regno] = dir;
> -
> -	return saved_reg;
> -}
> -
> -static int report_syscall_entry(struct pt_regs *regs)
> -{
> -	unsigned long saved_reg;
> -	int regno, ret;
> -
> -	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_ENTER, &regno);
> -	ret = ptrace_report_syscall_entry(regs);
> -	if (ret)
> -		forget_syscall(regs);
> -	regs->regs[regno] = saved_reg;
> -
> -	return ret;
> -}
> -
> -static void report_syscall_exit(struct pt_regs *regs)
> -{
> -	unsigned long saved_reg;
> -	int regno;
> -
> -	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
> -	if (!test_thread_flag(TIF_SINGLESTEP)) {
> -		ptrace_report_syscall_exit(regs, 0);
> -		regs->regs[regno] = saved_reg;
> -	} else {
> -		regs->regs[regno] = saved_reg;
> -
> -		/*
> -		 * Signal a pseudo-step exception since we are stepping but
> -		 * tracer modifications to the registers may have rewound the
> -		 * state machine.
> -		 */
> -		ptrace_report_syscall_exit(regs, 1);
> -	}
> -}
> -
> -static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
> -{
> -	if (unlikely(audit_context())) {
> -		unsigned long args[6];
> -
> -		syscall_get_arguments(current, regs, args);
> -		audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
> -	}
> -}
> -
> -int syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long flags)
> -{
> -	int ret;
> -
> -	if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
> -		ret = report_syscall_entry(regs);
> -		if (ret || (flags & _TIF_SYSCALL_EMU))
> -			return NO_SYSCALL;
> -	}
> -
> -	/* Do the secure computing after ptrace; failures should be fast. */
> -	if (flags & _TIF_SECCOMP) {
> -		ret = __secure_computing();
> -		if (ret == -1)
> -			return NO_SYSCALL;
> -	}
> -
> -	/* Either of the above might have changed the syscall number */
> -	syscall = syscall_get_nr(current, regs);
> -
> -	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) {
> -		trace_sys_enter(regs, syscall);
> -
> -		/*
> -		 * Probes or BPF hooks in the tracepoint may have changed the
> -		 * system call number as well.
> -		 */
> -		 syscall = syscall_get_nr(current, regs);
> -	}
> -
> -	syscall_enter_audit(regs, syscall);
> -
> -	return ret ? : syscall;
> -}
> -
> -static void syscall_exit_work(struct pt_regs *regs, unsigned long flags)
> -{
> -	audit_syscall_exit(regs);
> -
> -	if (flags & _TIF_SYSCALL_TRACEPOINT)
> -		trace_sys_exit(regs, syscall_get_return_value(current, regs));
> -
> -	if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
> -		report_syscall_exit(regs);
> -}
> -
> -void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
> -{
> -	unsigned long flags = read_thread_flags();
> -
> -	rseq_syscall(regs);
> -
> -	if (has_syscall_work(flags) || flags & _TIF_SINGLESTEP)
> -		syscall_exit_work(regs, flags);
> -}

Aside from the small change in arch_ptrace_report_syscall_exit(), these
look exactly equivalent to the generic functions, so LGTM.

- Kevin
Re: [PATCH v8 11/12] arm64: entry: Convert to generic entry
Posted by Jinjie Ruan 3 days, 19 hours ago

On 2025/11/27 21:31, Kevin Brodsky wrote:
> On 26/11/2025 08:14, Jinjie Ruan wrote:
>> Currently, x86, Riscv, Loongarch use the generic entry which makes
>> maintainers' work easier and codes more elegant. arm64 has already
>> switched to the generic IRQ entry, so completely convert arm64 to use
>> the generic entry infrastructure from kernel/entry/*.
>>
>> The changes are below:
>>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK,
> 
> _TIF_WORK_MASK is now removed in patch 1.
> 
>>    and remove has_syscall_work(), as _TIF_SYSCALL_WORK is equal with
>>    SYSCALL_WORK_ENTER.
>>
>> [...]
>>
>> +static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
>> +							    int step)
>> +{
>> +	unsigned long saved_reg;
>> +	int regno;
>> +
>> +	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
>> +	if (!step) {
> 
> A difference I noticed here is that the generic report_single_step()
> always returns false if SYSCALL_EMU is set. I don't know if the
> combination of SYSCALL_EMU and SINGLESTEP is meaningful, but if it is
> then I think that's a behaviour change.

commit 64eb35f701f0 ("ptrace: Migrate TIF_SYSCALL_EMU to use
SYSCALL_WORK flag") has changed the following code:

Therefore, the original logic returns false in these cases for
report_single_step() :

- Only _TIF_SYSCALL_EMU is set.

- Both _TIF_SINGLESTEP and _TIF_SYSCALL_EMU are set.

- Neither TIF_SINGLESTEP nor _TIF_SYSCALL_EMU is set;


    #define SYSEMU_STEP    (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)

    static inline bool report_single_step(unsigned long ti_work)
    {
       return (ti_work & SYSEMU_STEP) == _TIF_SINGLESTEP;
    }

I think the "returns false if SYSCALL_EMU is set" behaviour is correct
according to the Man's Manual, both PTRACE_SYSEMU and
PTRACE_SYSEMU_SINGLESTEP need to report the syscal only once on syscall
entry.

    “For PTRACE_SYSEMU, continue and stop on entry to the next
      system call, which will not be executed.  See the
      documentation on syscall-stops below.  For
      PTRACE_SYSEMU_SINGLESTEP, do the same but also singlestep
      if not a system call. “

Link:https://man7.org/linux/man-pages/man2/ptrace.2.html

> 
>> +		ptrace_report_syscall_exit(regs, 0);
>> +		regs->regs[regno] = saved_reg;
>> +	} else {
>> +		regs->regs[regno] = saved_reg;
>> +
>> +		/*
>> +		 * Signal a pseudo-step exception since we are stepping but
>> +		 * tracer modifications to the registers may have rewound the
>> +		 * state machine.
>> +		 */
>> +		ptrace_report_syscall_exit(regs, 1);
>> +	}
>> +}
>> +
>> +#define arch_ptrace_report_syscall_exit arch_ptrace_report_syscall_exit
>> +
>>  #endif /* _ASM_ARM64_ENTRY_COMMON_H */
>> diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
>> index 6225981fbbdb..f705ba2bb6fd 100644
>> --- a/arch/arm64/include/asm/syscall.h
>> +++ b/arch/arm64/include/asm/syscall.h
>> @@ -9,6 +9,9 @@
>>  #include <linux/compat.h>
>>  #include <linux/err.h>
>>  
>> +#include <asm/compat.h>
>> +#include <asm/vdso.h>
>> +
>>  typedef long (*syscall_fn_t)(const struct pt_regs *regs);
>>  
>>  extern const syscall_fn_t sys_call_table[];
>> @@ -114,12 +117,21 @@ static inline int syscall_get_arch(struct task_struct *task)
>>  	return AUDIT_ARCH_AARCH64;
>>  }
>>  
>> -static inline bool has_syscall_work(unsigned long flags)
>> +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
>>  {
>> -	return unlikely(flags & _TIF_SYSCALL_WORK);
>> +	unsigned long sigtramp;
>> +
>> +#ifdef CONFIG_COMPAT
>> +	if (is_compat_task()) {
>> +		unsigned long vdso = (unsigned long)current->mm->context.sigpage;
> 
> Might as well call it sigpage (separate from the vDSO on arm32).
> 
>> +
>> +		return (regs->pc >= vdso && regs->pc < (vdso + PAGE_SIZE));
> 
> Nit: no need for parentheses around the expression to return.
> 
>> +	}
>> +#endif
>> +	sigtramp = (unsigned long)VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
>> +	return regs->pc == (sigtramp + 8);
>>  }
>>  
>> -int syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long flags);
>> -void syscall_exit_to_user_mode_prepare(struct pt_regs *regs);
>> +#define ARCH_SYSCALL_WORK_EXIT	(_TIF_SECCOMP | _TIF_SYSCALL_EMU)
>>  
>>  #endif	/* __ASM_SYSCALL_H */
>> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
>> index ff4998fa1844..d3142b5d1b9c 100644
>> --- a/arch/arm64/include/asm/thread_info.h
>> +++ b/arch/arm64/include/asm/thread_info.h
>> @@ -43,6 +43,7 @@ struct thread_info {
>>  	void			*scs_sp;
>>  #endif
>>  	u32			cpu;
>> +	unsigned long		syscall_work;   /* SYSCALL_WORK_ flags */
>>  };
>>  
>>  #define thread_saved_pc(tsk)	\
>> @@ -65,11 +66,8 @@ void arch_setup_new_exec(void);
>>  #define TIF_UPROBE		5	/* uprobe breakpoint or singlestep */
>>  #define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
>>  #define TIF_NOTIFY_SIGNAL	7	/* signal notifications exist */
>> -#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
>> -#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
>> -#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
>> -#define TIF_SECCOMP		11	/* syscall secure computing */
>> -#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
>> +#define TIF_SECCOMP		11      /* syscall secure computing */
>> +#define TIF_SYSCALL_EMU		12      /* syscall emulation active */
> 
> These seem to have reappeared in v8 for some reason?

v8 add "ARCH_SYSCALL_WORK_EXIT" to be defined as "SECCOMP | SYSCALL_EMU"
  to keep the arm64 behaviour unchanged as mentioned in v7.

> 
>>  #define TIF_PATCH_PENDING	13	/* pending live patching update */
>>  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
>>  #define TIF_FREEZE		19
>> @@ -92,24 +90,16 @@ void arch_setup_new_exec(void);
>>  #define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
>>  #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
>>  #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
>> -#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
>> -#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
>> -#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
>>  #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
>>  #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
> 
> Ditto.
> 
>>  #define _TIF_PATCH_PENDING	(1 << TIF_PATCH_PENDING)
>>  #define _TIF_UPROBE		(1 << TIF_UPROBE)
>> -#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
>>  #define _TIF_32BIT		(1 << TIF_32BIT)
>>  #define _TIF_SVE		(1 << TIF_SVE)
>>  #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
>>  #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
>>  #define _TIF_TSC_SIGSEGV	(1 << TIF_TSC_SIGSEGV)
>>  
>> -#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
>> -				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
>> -				 _TIF_SYSCALL_EMU)
>> -
>>  #ifdef CONFIG_SHADOW_CALL_STACK
>>  #define INIT_SCS							\
>>  	.scs_base	= init_shadow_call_stack,			\
>> diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
>> index 29307642f4c9..e67643a70405 100644
>> --- a/arch/arm64/kernel/debug-monitors.c
>> +++ b/arch/arm64/kernel/debug-monitors.c
>> @@ -385,11 +385,18 @@ void user_enable_single_step(struct task_struct *task)
>>  
>>  	if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP))
>>  		set_regs_spsr_ss(task_pt_regs(task));
>> +
>> +	/*
>> +	 * Ensure that a trap is triggered once stepping out of a system
>> +	 * call prior to executing any user instruction.
>> +	 */
>> +	set_task_syscall_work(task, SYSCALL_EXIT_TRAP);
>>  }
>>  NOKPROBE_SYMBOL(user_enable_single_step);
>>  
>>  void user_disable_single_step(struct task_struct *task)
>>  {
>>  	clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
>> +	clear_task_syscall_work(task, SYSCALL_EXIT_TRAP);
>>  }
>>  NOKPROBE_SYMBOL(user_disable_single_step);
>> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
>> index c2bd0130212d..9e3b39e207d1 100644
>> --- a/arch/arm64/kernel/ptrace.c
>> +++ b/arch/arm64/kernel/ptrace.c
>> @@ -42,9 +42,6 @@
>>  #include <asm/traps.h>
>>  #include <asm/system_misc.h>
>>  
>> -#define CREATE_TRACE_POINTS
>> -#include <trace/events/syscalls.h>
>> -
>>  struct pt_regs_offset {
>>  	const char *name;
>>  	int offset;
>> @@ -2312,141 +2309,6 @@ long arch_ptrace(struct task_struct *child, long request,
>>  	return ptrace_request(child, request, addr, data);
>>  }
>>  
>> -enum ptrace_syscall_dir {
>> -	PTRACE_SYSCALL_ENTER = 0,
>> -	PTRACE_SYSCALL_EXIT,
>> -};
>> -
>> -static inline unsigned long ptrace_save_reg(struct pt_regs *regs,
>> -					    enum ptrace_syscall_dir dir,
>> -					    int *regno)
>> -{
>> -	unsigned long saved_reg;
>> -
>> -	/*
>> -	 * We have some ABI weirdness here in the way that we handle syscall
>> -	 * exit stops because we indicate whether or not the stop has been
>> -	 * signalled from syscall entry or syscall exit by clobbering a general
>> -	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
>> -	 * and restoring its old value after the stop. This means that:
>> -	 *
>> -	 * - Any writes by the tracer to this register during the stop are
>> -	 *   ignored/discarded.
>> -	 *
>> -	 * - The actual value of the register is not available during the stop,
>> -	 *   so the tracer cannot save it and restore it later.
>> -	 *
>> -	 * - Syscall stops behave differently to seccomp and pseudo-step traps
>> -	 *   (the latter do not nobble any registers).
>> -	 */
>> -	*regno = (is_compat_task() ? 12 : 7);
>> -	saved_reg = regs->regs[*regno];
>> -	regs->regs[*regno] = dir;
>> -
>> -	return saved_reg;
>> -}
>> -
>> -static int report_syscall_entry(struct pt_regs *regs)
>> -{
>> -	unsigned long saved_reg;
>> -	int regno, ret;
>> -
>> -	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_ENTER, &regno);
>> -	ret = ptrace_report_syscall_entry(regs);
>> -	if (ret)
>> -		forget_syscall(regs);
>> -	regs->regs[regno] = saved_reg;
>> -
>> -	return ret;
>> -}
>> -
>> -static void report_syscall_exit(struct pt_regs *regs)
>> -{
>> -	unsigned long saved_reg;
>> -	int regno;
>> -
>> -	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
>> -	if (!test_thread_flag(TIF_SINGLESTEP)) {
>> -		ptrace_report_syscall_exit(regs, 0);
>> -		regs->regs[regno] = saved_reg;
>> -	} else {
>> -		regs->regs[regno] = saved_reg;
>> -
>> -		/*
>> -		 * Signal a pseudo-step exception since we are stepping but
>> -		 * tracer modifications to the registers may have rewound the
>> -		 * state machine.
>> -		 */
>> -		ptrace_report_syscall_exit(regs, 1);
>> -	}
>> -}
>> -
>> -static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
>> -{
>> -	if (unlikely(audit_context())) {
>> -		unsigned long args[6];
>> -
>> -		syscall_get_arguments(current, regs, args);
>> -		audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
>> -	}
>> -}
>> -
>> -int syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long flags)
>> -{
>> -	int ret;
>> -
>> -	if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
>> -		ret = report_syscall_entry(regs);
>> -		if (ret || (flags & _TIF_SYSCALL_EMU))
>> -			return NO_SYSCALL;
>> -	}
>> -
>> -	/* Do the secure computing after ptrace; failures should be fast. */
>> -	if (flags & _TIF_SECCOMP) {
>> -		ret = __secure_computing();
>> -		if (ret == -1)
>> -			return NO_SYSCALL;
>> -	}
>> -
>> -	/* Either of the above might have changed the syscall number */
>> -	syscall = syscall_get_nr(current, regs);
>> -
>> -	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) {
>> -		trace_sys_enter(regs, syscall);
>> -
>> -		/*
>> -		 * Probes or BPF hooks in the tracepoint may have changed the
>> -		 * system call number as well.
>> -		 */
>> -		 syscall = syscall_get_nr(current, regs);
>> -	}
>> -
>> -	syscall_enter_audit(regs, syscall);
>> -
>> -	return ret ? : syscall;
>> -}
>> -
>> -static void syscall_exit_work(struct pt_regs *regs, unsigned long flags)
>> -{
>> -	audit_syscall_exit(regs);
>> -
>> -	if (flags & _TIF_SYSCALL_TRACEPOINT)
>> -		trace_sys_exit(regs, syscall_get_return_value(current, regs));
>> -
>> -	if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
>> -		report_syscall_exit(regs);
>> -}
>> -
>> -void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
>> -{
>> -	unsigned long flags = read_thread_flags();
>> -
>> -	rseq_syscall(regs);
>> -
>> -	if (has_syscall_work(flags) || flags & _TIF_SINGLESTEP)
>> -		syscall_exit_work(regs, flags);
>> -}
> 
> Aside from the small change in arch_ptrace_report_syscall_exit(), these
> look exactly equivalent to the generic functions, so LGTM.
> 
> - Kevin
> 
Re: [PATCH v8 11/12] arm64: entry: Convert to generic entry
Posted by Kevin Brodsky 3 days, 10 hours ago
On 28/11/2025 04:34, Jinjie Ruan wrote:
>
> On 2025/11/27 21:31, Kevin Brodsky wrote:
>> On 26/11/2025 08:14, Jinjie Ruan wrote:
>>> Currently, x86, Riscv, Loongarch use the generic entry which makes
>>> maintainers' work easier and codes more elegant. arm64 has already
>>> switched to the generic IRQ entry, so completely convert arm64 to use
>>> the generic entry infrastructure from kernel/entry/*.
>>>
>>> The changes are below:
>>>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK,
>> _TIF_WORK_MASK is now removed in patch 1.
>>
>>>    and remove has_syscall_work(), as _TIF_SYSCALL_WORK is equal with
>>>    SYSCALL_WORK_ENTER.
>>>
>>> [...]
>>>
>>> +static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
>>> +							    int step)
>>> +{
>>> +	unsigned long saved_reg;
>>> +	int regno;
>>> +
>>> +	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
>>> +	if (!step) {
>> A difference I noticed here is that the generic report_single_step()
>> always returns false if SYSCALL_EMU is set. I don't know if the
>> combination of SYSCALL_EMU and SINGLESTEP is meaningful, but if it is
>> then I think that's a behaviour change.
> commit 64eb35f701f0 ("ptrace: Migrate TIF_SYSCALL_EMU to use
> SYSCALL_WORK flag") has changed the following code:
>
> Therefore, the original logic returns false in these cases for
> report_single_step() :
>
> - Only _TIF_SYSCALL_EMU is set.
>
> - Both _TIF_SINGLESTEP and _TIF_SYSCALL_EMU are set.
>
> - Neither TIF_SINGLESTEP nor _TIF_SYSCALL_EMU is set;
>
>
>     #define SYSEMU_STEP    (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)
>
>     static inline bool report_single_step(unsigned long ti_work)
>     {
>        return (ti_work & SYSEMU_STEP) == _TIF_SINGLESTEP;
>     }

The code did look different before this commit, but AFAICT it was
functionally equivalent w.r.t. SYSEMU / SINGLESTEP.

> I think the "returns false if SYSCALL_EMU is set" behaviour is correct
> according to the Man's Manual, both PTRACE_SYSEMU and
> PTRACE_SYSEMU_SINGLESTEP need to report the syscal only once on syscall
> entry.
>
>     “For PTRACE_SYSEMU, continue and stop on entry to the next
>       system call, which will not be executed.  See the
>       documentation on syscall-stops below.  For
>       PTRACE_SYSEMU_SINGLESTEP, do the same but also singlestep
>       if not a system call. “

That seems sensible (based on my very limited understanding of SYSEMU),
nevertheless it is not what arm64 currently does AFAIU. To follow the
same logic as the rest, this change should be made in a separate patch.

> Link:https://man7.org/linux/man-pages/man2/ptrace.2.html
>
>>> [...]
>>>
>>>  #define TIF_UPROBE		5	/* uprobe breakpoint or singlestep */
>>>  #define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
>>>  #define TIF_NOTIFY_SIGNAL	7	/* signal notifications exist */
>>> -#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
>>> -#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
>>> -#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
>>> -#define TIF_SECCOMP		11	/* syscall secure computing */
>>> -#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
>>> +#define TIF_SECCOMP		11      /* syscall secure computing */
>>> +#define TIF_SYSCALL_EMU		12      /* syscall emulation active */
>> These seem to have reappeared in v8 for some reason?
> v8 add "ARCH_SYSCALL_WORK_EXIT" to be defined as "SECCOMP | SYSCALL_EMU"
>   to keep the arm64 behaviour unchanged as mentioned in v7.

Ah then that is where the issue is, I missed that: surely switching to
generic entry means that we are using SYSCALL_WORK_BIT_* rather than
TIF_* for all these flags?

- Kevin
Re: [PATCH v8 11/12] arm64: entry: Convert to generic entry
Posted by Jinjie Ruan 2 days, 22 hours ago

On 2025/11/28 21:32, Kevin Brodsky wrote:
> On 28/11/2025 04:34, Jinjie Ruan wrote:
>>
>> On 2025/11/27 21:31, Kevin Brodsky wrote:
>>> On 26/11/2025 08:14, Jinjie Ruan wrote:
>>>> Currently, x86, Riscv, Loongarch use the generic entry which makes
>>>> maintainers' work easier and codes more elegant. arm64 has already
>>>> switched to the generic IRQ entry, so completely convert arm64 to use
>>>> the generic entry infrastructure from kernel/entry/*.
>>>>
>>>> The changes are below:
>>>>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK,
>>> _TIF_WORK_MASK is now removed in patch 1.
>>>
>>>>    and remove has_syscall_work(), as _TIF_SYSCALL_WORK is equal with
>>>>    SYSCALL_WORK_ENTER.
>>>>
>>>> [...]
>>>>
>>>> +static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
>>>> +							    int step)
>>>> +{
>>>> +	unsigned long saved_reg;
>>>> +	int regno;
>>>> +
>>>> +	saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
>>>> +	if (!step) {
>>> A difference I noticed here is that the generic report_single_step()
>>> always returns false if SYSCALL_EMU is set. I don't know if the
>>> combination of SYSCALL_EMU and SINGLESTEP is meaningful, but if it is
>>> then I think that's a behaviour change.
>> commit 64eb35f701f0 ("ptrace: Migrate TIF_SYSCALL_EMU to use
>> SYSCALL_WORK flag") has changed the following code:
>>
>> Therefore, the original logic returns false in these cases for
>> report_single_step() :
>>
>> - Only _TIF_SYSCALL_EMU is set.
>>
>> - Both _TIF_SINGLESTEP and _TIF_SYSCALL_EMU are set.
>>
>> - Neither TIF_SINGLESTEP nor _TIF_SYSCALL_EMU is set;
>>
>>
>>     #define SYSEMU_STEP    (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)
>>
>>     static inline bool report_single_step(unsigned long ti_work)
>>     {
>>        return (ti_work & SYSEMU_STEP) == _TIF_SINGLESTEP;
>>     }
> 
> The code did look different before this commit, but AFAICT it was
> functionally equivalent w.r.t. SYSEMU / SINGLESTEP.
> 
>> I think the "returns false if SYSCALL_EMU is set" behaviour is correct
>> according to the Man's Manual, both PTRACE_SYSEMU and
>> PTRACE_SYSEMU_SINGLESTEP need to report the syscal only once on syscall
>> entry.
>>
>>     “For PTRACE_SYSEMU, continue and stop on entry to the next
>>       system call, which will not be executed.  See the
>>       documentation on syscall-stops below.  For
>>       PTRACE_SYSEMU_SINGLESTEP, do the same but also singlestep
>>       if not a system call. “
> 
> That seems sensible (based on my very limited understanding of SYSEMU),
> nevertheless it is not what arm64 currently does AFAIU. To follow the



> same logic as the rest, this change should be made in a separate patch.

Right, and the man page description seems to match the comments of the
report_single_step() function.

"74 /*
 75  * If SYSCALL_EMU is set, then the only reason to report is when
 76  * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP).  This syscall
 77  * instruction has been already reported in
syscall_enter_from_user_mode().
 78  */
"

> 
>> Link:https://man7.org/linux/man-pages/man2/ptrace.2.html
>>
>>>> [...]
>>>>
>>>>  #define TIF_UPROBE		5	/* uprobe breakpoint or singlestep */
>>>>  #define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
>>>>  #define TIF_NOTIFY_SIGNAL	7	/* signal notifications exist */
>>>> -#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
>>>> -#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
>>>> -#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
>>>> -#define TIF_SECCOMP		11	/* syscall secure computing */
>>>> -#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
>>>> +#define TIF_SECCOMP		11      /* syscall secure computing */
>>>> +#define TIF_SYSCALL_EMU		12      /* syscall emulation active */
>>> These seem to have reappeared in v8 for some reason?
>> v8 add "ARCH_SYSCALL_WORK_EXIT" to be defined as "SECCOMP | SYSCALL_EMU"
>>   to keep the arm64 behaviour unchanged as mentioned in v7.
> 
> Ah then that is where the issue is, I missed that: surely switching to
> generic entry means that we are using SYSCALL_WORK_BIT_* rather than
> TIF_* for all these flags?

I think they may be the same thing as you mentioned in v7,neither
SYSCALL_WORK_EXIT nor report_single_step() excluded SYSCALL_EMU, maybe
we should clarify them for arm64 together in a separate patch.

1、"The generic report_single_step() always returns false if SYSCALL_EMU
is set."

2、"
     > -void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
     > -{
     > -	unsigned long flags = read_thread_flags();
     > -
     > -	rseq_syscall(regs);
     > -
     > -	if (has_syscall_work(flags) || flags & _TIF_SINGLESTEP)

     I believe switching to the generic function introduces a change
     here: syscall_exit_work() is only called if a flag in
SYSCALL_WORK_EXIT is set, and this set does not include SYSCALL_EMU and
    SECCOMP. Practically this means that audit_syscall_exit() will no
    longer be called if only SECCOMP and/or SYSCALL_EMU is set.

   It doesn't feel like a major behaviour change, but it should be
   pointed out."

> 
> - Kevin
> 
Re: [PATCH v8 11/12] arm64: entry: Convert to generic entry
Posted by Kevin Brodsky 13 hours ago
On 29/11/2025 02:23, Jinjie Ruan wrote:
>>>>>  #define TIF_UPROBE		5	/* uprobe breakpoint or singlestep */
>>>>>  #define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
>>>>>  #define TIF_NOTIFY_SIGNAL	7	/* signal notifications exist */
>>>>> -#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
>>>>> -#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
>>>>> -#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
>>>>> -#define TIF_SECCOMP		11	/* syscall secure computing */
>>>>> -#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
>>>>> +#define TIF_SECCOMP		11      /* syscall secure computing */
>>>>> +#define TIF_SYSCALL_EMU		12      /* syscall emulation active */
>>>> These seem to have reappeared in v8 for some reason?
>>> v8 add "ARCH_SYSCALL_WORK_EXIT" to be defined as "SECCOMP | SYSCALL_EMU"
>>>   to keep the arm64 behaviour unchanged as mentioned in v7.
>> Ah then that is where the issue is, I missed that: surely switching to
>> generic entry means that we are using SYSCALL_WORK_BIT_* rather than
>> TIF_* for all these flags?
> I think they may be the same thing as you mentioned in v7,neither
> SYSCALL_WORK_EXIT nor report_single_step() excluded SYSCALL_EMU, maybe
> we should clarify them for arm64 together in a separate patch.

These two might indeed be related. On second thoughts, while waiting for
more knowledgeable arm64 reviewers, I would suggest aligning arm64 with
the generic entry. Which means...

> 1、"The generic report_single_step() always returns false if SYSCALL_EMU
> is set."

... replicating this behaviour on arm64 (in a separate patch), and...

> 2、"
>      > -void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
>      > -{
>      > -	unsigned long flags = read_thread_flags();
>      > -
>      > -	rseq_syscall(regs);
>      > -
>      > -	if (has_syscall_work(flags) || flags & _TIF_SINGLESTEP)
>
>      I believe switching to the generic function introduces a change
>      here: syscall_exit_work() is only called if a flag in
> SYSCALL_WORK_EXIT is set, and this set does not include SYSCALL_EMU and
>     SECCOMP. Practically this means that audit_syscall_exit() will no
>     longer be called if only SECCOMP and/or SYSCALL_EMU is set.
>
>    It doesn't feel like a major behaviour change, but it should be
>    pointed out."

... replicating this on arm64 as well, i.e. introducing a separate set
of flags for syscall exit. This should be a patch of its own, as it
isn't directly related to the report_single_step() behaviour (especially
since it concerns SECCOMP as well). It would also be an occasion to get
rid of has_syscall_work(), in preparation to the move to generic entry.

- Kevin