From nobody Sat Dec 28 00:14:09 2024 Received: from szxga02-in.huawei.com (szxga02-in.huawei.com [45.249.212.188]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B7E29214A66 for ; Fri, 6 Dec 2024 10:18:35 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=45.249.212.188 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1733480318; cv=none; b=mhZTCrVRFIDay8ez0Q2jyVsAYZFXJeQFruwcSzkp5+u+Rg/DzP5LxsIuyQ+n/bCR0e8TDxOQUAYcf1bNSgxQ5Q/zlxnCAR1migkqjB264vmVObQuXyDcZhNYyrFRvxvUkUtPvjaoAUrmonsIJA1E8ouWBYw1wWcQlW78z95j778= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1733480318; c=relaxed/simple; bh=YEK0+FJ69BIXMuNncC7yHMB2l20wvUFyHWVwmtqUB+c=; h=From:To:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=XaGjAA1dkAq7LFX9tn3LiYUlBjhwaXex+viDzJf3hQDd/p8SdEA9Hk65n5O4/XJsgxkuTgRcWXntmtd5iAOFtFNWNgPHRBBgtUdhcnqRks8gbSIuF17bmcCwju8G1fGkkL75U4vGH8qssp6f5R3oqtByN3LmT/C/GY/afGC2sJ4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=huawei.com; spf=pass smtp.mailfrom=huawei.com; arc=none smtp.client-ip=45.249.212.188 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=huawei.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=huawei.com Received: from mail.maildlp.com (unknown [172.19.163.252]) by szxga02-in.huawei.com (SkyGuard) with ESMTP id 4Y4RwZ71KbzgZ88; Fri, 6 Dec 2024 18:15:42 +0800 (CST) Received: from kwepemg200008.china.huawei.com (unknown [7.202.181.35]) by mail.maildlp.com (Postfix) with ESMTPS id B46F71800D9; Fri, 6 Dec 2024 18:18:33 +0800 (CST) Received: from huawei.com (10.90.53.73) by kwepemg200008.china.huawei.com (7.202.181.35) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Fri, 6 Dec 2024 18:18:32 +0800 From: Jinjie Ruan To: , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , Subject: [PATCH -next v5 22/22] arm64: entry: Convert to generic entry Date: Fri, 6 Dec 2024 18:17:44 +0800 Message-ID: <20241206101744.4161990-23-ruanjinjie@huawei.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20241206101744.4161990-1-ruanjinjie@huawei.com> References: <20241206101744.4161990-1-ruanjinjie@huawei.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-ClientProxiedBy: dggems706-chm.china.huawei.com (10.3.19.183) To kwepemg200008.china.huawei.com (7.202.181.35) Content-Type: text/plain; charset="utf-8" Currently, x86, Riscv, Loongarch use the generic entry. Convert arm64 to use the generic entry infrastructure from kernel/entry/*. The generic entry makes maintainers' work easier and codes more elegant. The changes are below: - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK - Remove syscall_trace_enter/exit() and use generic identical functions. Tested ok with following test cases on Qemu virt platform: - Perf tests. - Different `dynamic preempt` mode switch. - Pseudo NMI tests. - Stress-ng CPU stress test. - MTE test case in Documentation/arch/arm64/memory-tagging-extension.rst and all test cases in tools/testing/selftests/arm64/mte/*. Suggested-by: Mark Rutland Signed-off-by: Jinjie Ruan --- v5: - Rebased on the previous patch udapte. - Define ARCH_SYSCALL_WORK_EXIT. --- arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/entry-common.h | 70 ++++++++++++++ arch/arm64/include/asm/syscall.h | 7 +- arch/arm64/include/asm/thread_info.h | 23 +---- arch/arm64/kernel/debug-monitors.c | 7 ++ arch/arm64/kernel/ptrace.c | 134 -------------------------- arch/arm64/kernel/signal.c | 2 +- arch/arm64/kernel/syscall.c | 6 +- 8 files changed, 87 insertions(+), 164 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3751ab9f2a21..a1d96712428e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -148,9 +148,9 @@ config ARM64 select GENERIC_CPU_DEVICES select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP + select GENERIC_ENTRY select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP - select GENERIC_IRQ_ENTRY select GENERIC_IRQ_IPI select GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD select GENERIC_IRQ_PROBE diff --git a/arch/arm64/include/asm/entry-common.h b/arch/arm64/include/asm= /entry-common.h index 1cc9d966a6c3..6082393c61f2 100644 --- a/arch/arm64/include/asm/entry-common.h +++ b/arch/arm64/include/asm/entry-common.h @@ -10,6 +10,12 @@ #include #include =20 +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER =3D 0, + PTRACE_SYSCALL_EXIT, +}; + +#define ARCH_SYSCALL_WORK_EXIT (SYSCALL_WORK_SECCOMP | SYSCALL_WORK_SYSCAL= L_EMU) #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_MTE_ASYNC_FAULT | _TIF_FOREIGN_F= PSTATE) =20 static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *re= gs, @@ -61,4 +67,68 @@ static inline bool arch_irqentry_exit_need_resched(void) =20 #define arch_irqentry_exit_need_resched arch_irqentry_exit_need_resched =20 +static inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs) +{ + unsigned long saved_reg; + int regno, ret; + + /* + * We have some ABI weirdness here in the way that we handle syscall + * exit stops because we indicate whether or not the stop has been + * signalled from syscall entry or syscall exit by clobbering a general + * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee + * and restoring its old value after the stop. This means that: + * + * - Any writes by the tracer to this register during the stop are + * ignored/discarded. + * + * - The actual value of the register is not available during the stop, + * so the tracer cannot save it and restore it later. + * + * - Syscall stops behave differently to seccomp and pseudo-step traps + * (the latter do not nobble any registers). + */ + regno =3D (is_compat_task() ? 12 : 7); + saved_reg =3D regs->regs[regno]; + regs->regs[regno] =3D PTRACE_SYSCALL_ENTER; + + ret =3D ptrace_report_syscall_entry(regs); + if (ret) + forget_syscall(regs); + + regs->regs[regno] =3D saved_reg; + + return ret; +} + +#define arch_ptrace_report_syscall_entry arch_ptrace_report_syscall_entry + +static inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs, + int step) +{ + unsigned long saved_reg; + int regno; + + /* See comment for arch_ptrace_report_syscall_entry() */ + regno =3D (is_compat_task() ? 12 : 7); + saved_reg =3D regs->regs[regno]; + regs->regs[regno] =3D PTRACE_SYSCALL_EXIT; + + if (!test_thread_flag(TIF_SINGLESTEP)) { + ptrace_report_syscall_exit(regs, 0); + regs->regs[regno] =3D saved_reg; + } else { + regs->regs[regno] =3D saved_reg; + + /* + * Signal a pseudo-step exception since we are stepping but + * tracer modifications to the registers may have rewound the + * state machine. + */ + ptrace_report_syscall_exit(regs, 1); + } +} + +#define arch_ptrace_report_syscall_exit arch_ptrace_report_syscall_exit + #endif /* _ASM_ARM64_ENTRY_COMMON_H */ diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/sysc= all.h index 6eeb1e7b033f..9891b15da4c3 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -85,12 +85,9 @@ static inline int syscall_get_arch(struct task_struct *t= ask) return AUDIT_ARCH_AARCH64; } =20 -static inline bool has_syscall_work(unsigned long flags) +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) { - return unlikely(flags & _TIF_SYSCALL_WORK); + return false; } =20 -int syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long = flags); -void syscall_exit_to_user_mode_prepare(struct pt_regs *regs); - #endif /* __ASM_SYSCALL_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/= thread_info.h index 1114c1c3300a..543fdb00d713 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -43,6 +43,7 @@ struct thread_info { void *scs_sp; #endif u32 cpu; + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ }; =20 #define thread_saved_pc(tsk) \ @@ -64,11 +65,6 @@ void arch_setup_new_exec(void); #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ -#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ -#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ -#define TIF_SECCOMP 11 /* syscall secure computing */ -#define TIF_SYSCALL_EMU 12 /* syscall emulation active */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -87,28 +83,13 @@ void arch_setup_new_exec(void); #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) -#define _TIF_UPROBE (1 << TIF_UPROBE) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV) =20 -#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ - _TIF_NOTIFY_SIGNAL) - -#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ - _TIF_SYSCALL_EMU) - #ifdef CONFIG_SHADOW_CALL_STACK #define INIT_SCS \ .scs_base =3D init_shadow_call_stack, \ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-m= onitors.c index 460c09d03a73..95b70555a1a8 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -452,11 +452,18 @@ void user_enable_single_step(struct task_struct *task) =20 if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); + + /* + * Ensure that a trap is triggered once stepping out of a system + * call prior to executing any user instruction. + */ + set_task_syscall_work(task, SYSCALL_EXIT_TRAP); } NOKPROBE_SYMBOL(user_enable_single_step); =20 void user_disable_single_step(struct task_struct *task) { clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); + clear_task_syscall_work(task, SYSCALL_EXIT_TRAP); } NOKPROBE_SYMBOL(user_disable_single_step); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 3a7a1eaca0a9..a09058b9b7fb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -42,9 +42,6 @@ #include #include =20 -#define CREATE_TRACE_POINTS -#include - struct pt_regs_offset { const char *name; int offset; @@ -2293,137 +2290,6 @@ long arch_ptrace(struct task_struct *child, long re= quest, return ptrace_request(child, request, addr, data); } =20 -enum ptrace_syscall_dir { - PTRACE_SYSCALL_ENTER =3D 0, - PTRACE_SYSCALL_EXIT, -}; - -static int report_syscall_enter(struct pt_regs *regs) -{ - unsigned long saved_reg; - int regno, ret; - - /* - * We have some ABI weirdness here in the way that we handle syscall - * exit stops because we indicate whether or not the stop has been - * signalled from syscall entry or syscall exit by clobbering a general - * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee - * and restoring its old value after the stop. This means that: - * - * - Any writes by the tracer to this register during the stop are - * ignored/discarded. - * - * - The actual value of the register is not available during the stop, - * so the tracer cannot save it and restore it later. - * - * - Syscall stops behave differently to seccomp and pseudo-step traps - * (the latter do not nobble any registers). - */ - regno =3D (is_compat_task() ? 12 : 7); - saved_reg =3D regs->regs[regno]; - regs->regs[regno] =3D PTRACE_SYSCALL_ENTER; - - ret =3D ptrace_report_syscall_entry(regs); - if (ret) - forget_syscall(regs); - - regs->regs[regno] =3D saved_reg; - - return ret; -} - -static void report_syscall_exit(struct pt_regs *regs) -{ - int regno; - unsigned long saved_reg; - - /* See comment for report_syscall_enter() */ - regno =3D (is_compat_task() ? 12 : 7); - saved_reg =3D regs->regs[regno]; - regs->regs[regno] =3D PTRACE_SYSCALL_EXIT; - - if (!test_thread_flag(TIF_SINGLESTEP)) { - ptrace_report_syscall_exit(regs, 0); - regs->regs[regno] =3D saved_reg; - } else { - regs->regs[regno] =3D saved_reg; - - /* - * Signal a pseudo-step exception since we are stepping but - * tracer modifications to the registers may have rewound the - * state machine. - */ - ptrace_report_syscall_exit(regs, 1); - } -} - -static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) -{ - if (unlikely(audit_context())) { - unsigned long args[6]; - - syscall_get_arguments(current, regs, args); - audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); - } - -} - -int syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long = flags) -{ - int ret; - - if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) { - ret =3D report_syscall_enter(regs); - if (ret || (flags & _TIF_SYSCALL_EMU)) - return NO_SYSCALL; - } - - /* Do the secure computing after ptrace; failures should be fast. */ - if (flags & _TIF_SECCOMP) { - ret =3D __secure_computing(NULL); - if (ret =3D=3D -1L) - return NO_SYSCALL; - } - - /* Either of the above might have changed the syscall number */ - syscall =3D syscall_get_nr(current, regs); - - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) { - trace_sys_enter(regs, syscall); - - /* - * Probes or BPF hooks in the tracepoint may have changed the - * system call number as well. - */ - syscall =3D syscall_get_nr(current, regs); - } - - syscall_enter_audit(regs, syscall); - - return regs->syscallno; -} - -static void syscall_trace_exit(struct pt_regs *regs, unsigned long flags) -{ - audit_syscall_exit(regs); - - if (flags & _TIF_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, syscall_get_return_value(current, regs)); - - if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) - report_syscall_exit(regs); -} - -void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) -{ - unsigned long flags =3D read_thread_flags(); - - rseq_syscall(regs); - - if (has_syscall_work(flags) || flags & _TIF_SINGLESTEP) - syscall_trace_exit(regs, flags); -} - /* * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a. * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which = is diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 84b6628647c7..6cc8fe19e6a0 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -8,8 +8,8 @@ =20 #include #include +#include #include -#include #include #include #include diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index a50db885fc34..5aa585111c4b 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -2,6 +2,7 @@ =20 #include #include +#include #include #include #include @@ -68,6 +69,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned= int scno, static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, const syscall_fn_t syscall_table[]) { + unsigned long work =3D READ_ONCE(current_thread_info()->syscall_work); unsigned long flags =3D read_thread_flags(); =20 regs->orig_x0 =3D regs->regs[0]; @@ -101,7 +103,7 @@ static void el0_svc_common(struct pt_regs *regs, int sc= no, int sc_nr, return; } =20 - if (has_syscall_work(flags)) { + if (has_syscall_work(work)) { /* * The de-facto standard way to skip a system call using ptrace * is to set the system call to -1 (NO_SYSCALL) and set x0 to a @@ -119,7 +121,7 @@ static void el0_svc_common(struct pt_regs *regs, int sc= no, int sc_nr, */ if (scno =3D=3D NO_SYSCALL) syscall_set_return_value(current, regs, -ENOSYS, 0); - scno =3D syscall_trace_enter(regs, regs->syscallno, flags); + scno =3D syscall_trace_enter(regs, regs->syscallno, work); if (scno =3D=3D NO_SYSCALL) { syscall_exit_to_user_mode_prepare(regs); return; --=20 2.34.1