Pretty much every caller of is_endbr() actually wants to test something at an
address and ends up doing get_kernel_nofault(). Fold the lot into a more
convenient helper.
Note: this effectively reverts commit a8497506cd2c ("bpf: Avoid
get_kernel_nofault() to fetch kprobe entry IP") which was entirely the
wrong way to go about doing things. The right solution is to optimize
get_kernel_nofault() itself, it really doesn't need STAC/CLAC nor the
speculation barrier. Using __get_user is a historical hack, not a
requirement.
Cc: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/events/core.c | 2 +-
arch/x86/include/asm/ibt.h | 5 +++--
arch/x86/kernel/alternative.c | 19 +++++++++++++------
arch/x86/kernel/kprobes/core.c | 11 +----------
arch/x86/net/bpf_jit_comp.c | 4 ++--
kernel/trace/bpf_trace.c | 14 ++------------
6 files changed, 22 insertions(+), 33 deletions(-)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2845,7 +2845,7 @@ static bool is_uprobe_at_func_entry(stru
return true;
/* endbr64 (64-bit only) */
- if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn))
+ if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
return true;
return false;
--- a/arch/x86/include/asm/ibt.h
+++ b/arch/x86/include/asm/ibt.h
@@ -65,7 +65,7 @@ static __always_inline __attribute_const
return 0x001f0f66; /* osp nopl (%rax) */
}
-static inline bool is_endbr(u32 val)
+static inline bool __is_endbr(u32 val)
{
if (val == gen_endbr_poison())
return true;
@@ -74,6 +74,7 @@ static inline bool is_endbr(u32 val)
return val == gen_endbr();
}
+extern __noendbr bool is_endbr(u32 *val);
extern __noendbr u64 ibt_save(bool disable);
extern __noendbr void ibt_restore(u64 save);
@@ -102,7 +103,7 @@ extern bool __do_kernel_cp_fault(struct
#define __noendbr
-static inline bool is_endbr(u32 val) { return false; }
+static inline bool is_endbr(u32 *val) { return false; }
static inline u64 ibt_save(bool disable) { return 0; }
static inline void ibt_restore(u64 save) { }
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -852,16 +852,23 @@ void __init_or_module noinline apply_ret
#ifdef CONFIG_X86_KERNEL_IBT
+__noendbr bool is_endbr(u32 *val)
+{
+ u32 endbr;
+
+ if (get_kernel_nofault(endbr, val))
+ return false;
+
+ return __is_endbr(endbr);
+}
+
static void poison_cfi(void *addr);
static void __init_or_module poison_endbr(void *addr, bool warn)
{
- u32 endbr, poison = gen_endbr_poison();
-
- if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
- return;
+ u32 poison = gen_endbr_poison();
- if (!is_endbr(endbr)) {
+ if (!is_endbr(addr)) {
WARN_ON_ONCE(warn);
return;
}
@@ -988,7 +995,7 @@ static u32 cfi_seed __ro_after_init;
static u32 cfi_rehash(u32 hash)
{
hash ^= cfi_seed;
- while (unlikely(is_endbr(hash) || is_endbr(-hash))) {
+ while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) {
bool lsb = hash & 1;
hash >>= 1;
if (lsb)
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -373,16 +373,7 @@ static bool can_probe(unsigned long padd
kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
bool *on_func_entry)
{
- u32 insn;
-
- /*
- * Since 'addr' is not guaranteed to be safe to access, use
- * copy_from_kernel_nofault() to read the instruction:
- */
- if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32)))
- return NULL;
-
- if (is_endbr(insn)) {
+ if (is_endbr((u32 *)addr)) {
*on_func_entry = !offset || offset == 4;
if (*on_func_entry)
offset = 4;
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -625,7 +625,7 @@ int bpf_arch_text_poke(void *ip, enum bp
* See emit_prologue(), for IBT builds the trampoline hook is preceded
* with an ENDBR instruction.
*/
- if (is_endbr(*(u32 *)ip))
+ if (is_endbr(ip))
ip += ENDBR_INSN_SIZE;
return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
@@ -2971,7 +2971,7 @@ static int __arch_prepare_bpf_trampoline
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
*/
- if (is_endbr(*(u32 *)orig_call))
+ if (is_endbr(orig_call))
orig_call += ENDBR_INSN_SIZE;
orig_call += X86_PATCH_SIZE;
}
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1027,19 +1027,9 @@ static const struct bpf_func_proto bpf_g
#ifdef CONFIG_X86_KERNEL_IBT
static unsigned long get_entry_ip(unsigned long fentry_ip)
{
- u32 instr;
+ if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE)))
+ return fentry_ip - ENDBR_INSN_SIZE;
- /* We want to be extra safe in case entry ip is on the page edge,
- * but otherwise we need to avoid get_kernel_nofault()'s overhead.
- */
- if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) {
- if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE)))
- return fentry_ip;
- } else {
- instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE);
- }
- if (is_endbr(instr))
- fentry_ip -= ENDBR_INSN_SIZE;
return fentry_ip;
}
#else
On Fri, Sep 27, 2024 at 12:50 PM Peter Zijlstra <peterz@infradead.org> wrote: > > Pretty much every caller of is_endbr() actually wants to test something at an > address and ends up doing get_kernel_nofault(). Fold the lot into a more > convenient helper. > > Note: this effectively reverts commit a8497506cd2c ("bpf: Avoid > get_kernel_nofault() to fetch kprobe entry IP") which was entirely the > wrong way to go about doing things. The right solution is to optimize > get_kernel_nofault() itself, it really doesn't need STAC/CLAC nor the > speculation barrier. Using __get_user is a historical hack, not a > requirement. > > Cc: Andrii Nakryiko <andrii@kernel.org> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > arch/x86/events/core.c | 2 +- > arch/x86/include/asm/ibt.h | 5 +++-- > arch/x86/kernel/alternative.c | 19 +++++++++++++------ > arch/x86/kernel/kprobes/core.c | 11 +---------- > arch/x86/net/bpf_jit_comp.c | 4 ++-- > kernel/trace/bpf_trace.c | 14 ++------------ > 6 files changed, 22 insertions(+), 33 deletions(-) > > --- a/arch/x86/events/core.c > +++ b/arch/x86/events/core.c > @@ -2845,7 +2845,7 @@ static bool is_uprobe_at_func_entry(stru > return true; > > /* endbr64 (64-bit only) */ > - if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn)) > + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) > return true; > > return false; > --- a/arch/x86/include/asm/ibt.h > +++ b/arch/x86/include/asm/ibt.h > @@ -65,7 +65,7 @@ static __always_inline __attribute_const > return 0x001f0f66; /* osp nopl (%rax) */ > } > > -static inline bool is_endbr(u32 val) > +static inline bool __is_endbr(u32 val) > { > if (val == gen_endbr_poison()) > return true; > @@ -74,6 +74,7 @@ static inline bool is_endbr(u32 val) > return val == gen_endbr(); > } > > +extern __noendbr bool is_endbr(u32 *val); > extern __noendbr u64 ibt_save(bool disable); > extern __noendbr void ibt_restore(u64 save); > > @@ -102,7 +103,7 @@ extern bool __do_kernel_cp_fault(struct > > #define __noendbr > > -static inline bool is_endbr(u32 val) { return false; } > +static inline bool is_endbr(u32 *val) { return false; } > > static inline u64 ibt_save(bool disable) { return 0; } > static inline void ibt_restore(u64 save) { } > --- a/arch/x86/kernel/alternative.c > +++ b/arch/x86/kernel/alternative.c > @@ -852,16 +852,23 @@ void __init_or_module noinline apply_ret > > #ifdef CONFIG_X86_KERNEL_IBT > > +__noendbr bool is_endbr(u32 *val) > +{ > + u32 endbr; > + > + if (get_kernel_nofault(endbr, val)) > + return false; > + > + return __is_endbr(endbr); > +} > + > static void poison_cfi(void *addr); > > static void __init_or_module poison_endbr(void *addr, bool warn) > { > - u32 endbr, poison = gen_endbr_poison(); > - > - if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) > - return; > + u32 poison = gen_endbr_poison(); > > - if (!is_endbr(endbr)) { > + if (!is_endbr(addr)) { > WARN_ON_ONCE(warn); > return; > } > @@ -988,7 +995,7 @@ static u32 cfi_seed __ro_after_init; > static u32 cfi_rehash(u32 hash) > { > hash ^= cfi_seed; > - while (unlikely(is_endbr(hash) || is_endbr(-hash))) { > + while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) { > bool lsb = hash & 1; > hash >>= 1; > if (lsb) > --- a/arch/x86/kernel/kprobes/core.c > +++ b/arch/x86/kernel/kprobes/core.c > @@ -373,16 +373,7 @@ static bool can_probe(unsigned long padd > kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, > bool *on_func_entry) > { > - u32 insn; > - > - /* > - * Since 'addr' is not guaranteed to be safe to access, use > - * copy_from_kernel_nofault() to read the instruction: > - */ > - if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) > - return NULL; > - > - if (is_endbr(insn)) { > + if (is_endbr((u32 *)addr)) { > *on_func_entry = !offset || offset == 4; > if (*on_func_entry) > offset = 4; > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -625,7 +625,7 @@ int bpf_arch_text_poke(void *ip, enum bp > * See emit_prologue(), for IBT builds the trampoline hook is preceded > * with an ENDBR instruction. > */ > - if (is_endbr(*(u32 *)ip)) > + if (is_endbr(ip)) > ip += ENDBR_INSN_SIZE; > > return __bpf_arch_text_poke(ip, t, old_addr, new_addr); > @@ -2971,7 +2971,7 @@ static int __arch_prepare_bpf_trampoline > /* skip patched call instruction and point orig_call to actual > * body of the kernel function. > */ > - if (is_endbr(*(u32 *)orig_call)) > + if (is_endbr(orig_call)) > orig_call += ENDBR_INSN_SIZE; > orig_call += X86_PATCH_SIZE; > } > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -1027,19 +1027,9 @@ static const struct bpf_func_proto bpf_g > #ifdef CONFIG_X86_KERNEL_IBT > static unsigned long get_entry_ip(unsigned long fentry_ip) > { > - u32 instr; > + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) > + return fentry_ip - ENDBR_INSN_SIZE; > > - /* We want to be extra safe in case entry ip is on the page edge, > - * but otherwise we need to avoid get_kernel_nofault()'s overhead. > - */ > - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { > - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) > - return fentry_ip; > - } else { > - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); > - } > - if (is_endbr(instr)) > - fentry_ip -= ENDBR_INSN_SIZE; > return fentry_ip; Pls don't. This re-introduces the overhead that we want to avoid. Just call __is_endbr() here and keep the optimization.
On Sun, Sep 29, 2024 at 10:32:38AM -0700, Alexei Starovoitov wrote: > On Fri, Sep 27, 2024 at 12:50 PM Peter Zijlstra <peterz@infradead.org> wrote: > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -1027,19 +1027,9 @@ static const struct bpf_func_proto bpf_g > > #ifdef CONFIG_X86_KERNEL_IBT > > static unsigned long get_entry_ip(unsigned long fentry_ip) > > { > > - u32 instr; > > + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) > > + return fentry_ip - ENDBR_INSN_SIZE; > > > > - /* We want to be extra safe in case entry ip is on the page edge, > > - * but otherwise we need to avoid get_kernel_nofault()'s overhead. > > - */ > > - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { > > - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) > > - return fentry_ip; > > - } else { > > - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); > > - } > > - if (is_endbr(instr)) > > - fentry_ip -= ENDBR_INSN_SIZE; > > return fentry_ip; > > Pls don't. > > This re-introduces the overhead that we want to avoid. > > Just call __is_endbr() here and keep the optimization. Well, I could do that ofcourse, but as I wrote elsewhere, the right thing to do is to optimize get_kernel_nofault(), its current implementation is needlessly expensive. All we really need is a load with an exception entry, the STAC/CLAC and speculation nonsense should not be needed. Fixing get_kernel_nofault() benefits all users.
On Mon, Sep 30, 2024 at 10:30:26AM +0200, Peter Zijlstra wrote: > On Sun, Sep 29, 2024 at 10:32:38AM -0700, Alexei Starovoitov wrote: > > On Fri, Sep 27, 2024 at 12:50 PM Peter Zijlstra <peterz@infradead.org> wrote: > > > > --- a/kernel/trace/bpf_trace.c > > > +++ b/kernel/trace/bpf_trace.c > > > @@ -1027,19 +1027,9 @@ static const struct bpf_func_proto bpf_g > > > #ifdef CONFIG_X86_KERNEL_IBT > > > static unsigned long get_entry_ip(unsigned long fentry_ip) > > > { > > > - u32 instr; > > > + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) > > > + return fentry_ip - ENDBR_INSN_SIZE; > > > > > > - /* We want to be extra safe in case entry ip is on the page edge, > > > - * but otherwise we need to avoid get_kernel_nofault()'s overhead. > > > - */ > > > - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { > > > - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) > > > - return fentry_ip; > > > - } else { > > > - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); > > > - } > > > - if (is_endbr(instr)) > > > - fentry_ip -= ENDBR_INSN_SIZE; > > > return fentry_ip; > > > > Pls don't. > > > > This re-introduces the overhead that we want to avoid. > > > > Just call __is_endbr() here and keep the optimization. > > Well, I could do that ofcourse, but as I wrote elsewhere, the right > thing to do is to optimize get_kernel_nofault(), its current > implementation is needlessly expensive. All we really need is a load > with an exception entry, the STAC/CLAC and speculation nonsense should > not be needed. Looking at things, something like the below actually generates sane code: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a582cd25ca87..84f65ee9736c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1029,17 +1029,10 @@ static unsigned long get_entry_ip(unsigned long fentry_ip) { u32 instr; - /* We want to be extra safe in case entry ip is on the page edge, - * but otherwise we need to avoid get_kernel_nofault()'s overhead. - */ - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) - return fentry_ip; - } else { - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); - } + __get_kernel_nofault(&instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE), u32, Efault); if (is_endbr(instr)) fentry_ip -= ENDBR_INSN_SIZE; +Efault: return fentry_ip; } #else Which then leads to me rewriting the proposed patch as... --- Subject: x86/ibt: Clean up is_endbr() From: Peter Zijlstra <peterz@infradead.org> Pretty much every caller of is_endbr() actually wants to test something at an address and ends up doing get_kernel_nofault(). Fold the lot into a more convenient helper. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> --- arch/x86/events/core.c | 2 +- arch/x86/include/asm/ibt.h | 5 +++-- arch/x86/kernel/alternative.c | 20 ++++++++++++++------ arch/x86/kernel/kprobes/core.c | 11 +---------- arch/x86/net/bpf_jit_comp.c | 4 ++-- kernel/trace/bpf_trace.c | 14 ++------------ 6 files changed, 23 insertions(+), 33 deletions(-) --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2845,7 +2845,7 @@ static bool is_uprobe_at_func_entry(stru return true; /* endbr64 (64-bit only) */ - if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn)) + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) return true; return false; --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -65,7 +65,7 @@ static __always_inline __attribute_const return 0x001f0f66; /* osp nopl (%rax) */ } -static inline bool is_endbr(u32 val) +static inline bool __is_endbr(u32 val) { if (val == gen_endbr_poison()) return true; @@ -74,6 +74,7 @@ static inline bool is_endbr(u32 val) return val == gen_endbr(); } +extern __noendbr bool is_endbr(u32 *val); extern __noendbr u64 ibt_save(bool disable); extern __noendbr void ibt_restore(u64 save); @@ -102,7 +103,7 @@ extern bool __do_kernel_cp_fault(struct #define __noendbr -static inline bool is_endbr(u32 val) { return false; } +static inline bool is_endbr(u32 *val) { return false; } static inline u64 ibt_save(bool disable) { return 0; } static inline void ibt_restore(u64 save) { } --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -852,16 +852,24 @@ void __init_or_module noinline apply_ret #ifdef CONFIG_X86_KERNEL_IBT +__noendbr bool is_endbr(u32 *val) +{ + u32 endbr; + + __get_kernel_nofault(&endbr, val, u32, Efault); + return __is_endbr(endbr); + +Efault: + return false; +} + static void poison_cfi(void *addr); static void __init_or_module poison_endbr(void *addr, bool warn) { - u32 endbr, poison = gen_endbr_poison(); - - if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) - return; + u32 poison = gen_endbr_poison(); - if (!is_endbr(endbr)) { + if (!is_endbr(addr)) { WARN_ON_ONCE(warn); return; } @@ -988,7 +996,7 @@ static u32 cfi_seed __ro_after_init; static u32 cfi_rehash(u32 hash) { hash ^= cfi_seed; - while (unlikely(is_endbr(hash) || is_endbr(-hash))) { + while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) { bool lsb = hash & 1; hash >>= 1; if (lsb) --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -373,16 +373,7 @@ static bool can_probe(unsigned long padd kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - u32 insn; - - /* - * Since 'addr' is not guaranteed to be safe to access, use - * copy_from_kernel_nofault() to read the instruction: - */ - if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) - return NULL; - - if (is_endbr(insn)) { + if (is_endbr((u32 *)addr)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -625,7 +625,7 @@ int bpf_arch_text_poke(void *ip, enum bp * See emit_prologue(), for IBT builds the trampoline hook is preceded * with an ENDBR instruction. */ - if (is_endbr(*(u32 *)ip)) + if (is_endbr(ip)) ip += ENDBR_INSN_SIZE; return __bpf_arch_text_poke(ip, t, old_addr, new_addr); @@ -2971,7 +2971,7 @@ static int __arch_prepare_bpf_trampoline /* skip patched call instruction and point orig_call to actual * body of the kernel function. */ - if (is_endbr(*(u32 *)orig_call)) + if (is_endbr(orig_call)) orig_call += ENDBR_INSN_SIZE; orig_call += X86_PATCH_SIZE; } --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1027,19 +1027,9 @@ static const struct bpf_func_proto bpf_g #ifdef CONFIG_X86_KERNEL_IBT static unsigned long get_entry_ip(unsigned long fentry_ip) { - u32 instr; + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) + return fentry_ip - ENDBR_INSN_SIZE; - /* We want to be extra safe in case entry ip is on the page edge, - * but otherwise we need to avoid get_kernel_nofault()'s overhead. - */ - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) - return fentry_ip; - } else { - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); - } - if (is_endbr(instr)) - fentry_ip -= ENDBR_INSN_SIZE; return fentry_ip; } #else
On Mon, Sep 30, 2024 at 2:33 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Mon, Sep 30, 2024 at 10:30:26AM +0200, Peter Zijlstra wrote: > > On Sun, Sep 29, 2024 at 10:32:38AM -0700, Alexei Starovoitov wrote: > > > On Fri, Sep 27, 2024 at 12:50 PM Peter Zijlstra <peterz@infradead.org> wrote: > > > > > > --- a/kernel/trace/bpf_trace.c > > > > +++ b/kernel/trace/bpf_trace.c > > > > @@ -1027,19 +1027,9 @@ static const struct bpf_func_proto bpf_g > > > > #ifdef CONFIG_X86_KERNEL_IBT > > > > static unsigned long get_entry_ip(unsigned long fentry_ip) > > > > { > > > > - u32 instr; > > > > + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) > > > > + return fentry_ip - ENDBR_INSN_SIZE; > > > > > > > > - /* We want to be extra safe in case entry ip is on the page edge, > > > > - * but otherwise we need to avoid get_kernel_nofault()'s overhead. > > > > - */ > > > > - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { > > > > - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) > > > > - return fentry_ip; > > > > - } else { > > > > - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); > > > > - } > > > > - if (is_endbr(instr)) > > > > - fentry_ip -= ENDBR_INSN_SIZE; > > > > return fentry_ip; > > > > > > Pls don't. > > > > > > This re-introduces the overhead that we want to avoid. > > > > > > Just call __is_endbr() here and keep the optimization. > > > > Well, I could do that ofcourse, but as I wrote elsewhere, the right > > thing to do is to optimize get_kernel_nofault(), its current > > implementation is needlessly expensive. All we really need is a load > > with an exception entry, the STAC/CLAC and speculation nonsense should > > not be needed. > > Looking at things, something like the below actually generates sane > code: > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index a582cd25ca87..84f65ee9736c 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -1029,17 +1029,10 @@ static unsigned long get_entry_ip(unsigned long fentry_ip) > { > u32 instr; > > - /* We want to be extra safe in case entry ip is on the page edge, > - * but otherwise we need to avoid get_kernel_nofault()'s overhead. > - */ > - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { > - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) > - return fentry_ip; > - } else { > - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); > - } > + __get_kernel_nofault(&instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE), u32, Efault); > if (is_endbr(instr)) > fentry_ip -= ENDBR_INSN_SIZE; > +Efault: > return fentry_ip; > } > #else > > > Which then leads to me rewriting the proposed patch as... > > --- > Subject: x86/ibt: Clean up is_endbr() > From: Peter Zijlstra <peterz@infradead.org> > > Pretty much every caller of is_endbr() actually wants to test something at an > address and ends up doing get_kernel_nofault(). Fold the lot into a more > convenient helper. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > arch/x86/events/core.c | 2 +- > arch/x86/include/asm/ibt.h | 5 +++-- > arch/x86/kernel/alternative.c | 20 ++++++++++++++------ > arch/x86/kernel/kprobes/core.c | 11 +---------- > arch/x86/net/bpf_jit_comp.c | 4 ++-- > kernel/trace/bpf_trace.c | 14 ++------------ > 6 files changed, 23 insertions(+), 33 deletions(-) > > --- a/arch/x86/events/core.c > +++ b/arch/x86/events/core.c > @@ -2845,7 +2845,7 @@ static bool is_uprobe_at_func_entry(stru > return true; > > /* endbr64 (64-bit only) */ > - if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn)) > + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) > return true; > > return false; > --- a/arch/x86/include/asm/ibt.h > +++ b/arch/x86/include/asm/ibt.h > @@ -65,7 +65,7 @@ static __always_inline __attribute_const > return 0x001f0f66; /* osp nopl (%rax) */ > } > > -static inline bool is_endbr(u32 val) > +static inline bool __is_endbr(u32 val) > { > if (val == gen_endbr_poison()) > return true; > @@ -74,6 +74,7 @@ static inline bool is_endbr(u32 val) > return val == gen_endbr(); > } > > +extern __noendbr bool is_endbr(u32 *val); > extern __noendbr u64 ibt_save(bool disable); > extern __noendbr void ibt_restore(u64 save); > > @@ -102,7 +103,7 @@ extern bool __do_kernel_cp_fault(struct > > #define __noendbr > > -static inline bool is_endbr(u32 val) { return false; } > +static inline bool is_endbr(u32 *val) { return false; } > > static inline u64 ibt_save(bool disable) { return 0; } > static inline void ibt_restore(u64 save) { } > --- a/arch/x86/kernel/alternative.c > +++ b/arch/x86/kernel/alternative.c > @@ -852,16 +852,24 @@ void __init_or_module noinline apply_ret > > #ifdef CONFIG_X86_KERNEL_IBT > > +__noendbr bool is_endbr(u32 *val) > +{ > + u32 endbr; > + > + __get_kernel_nofault(&endbr, val, u32, Efault); > + return __is_endbr(endbr); > + > +Efault: > + return false; > +} > + > static void poison_cfi(void *addr); > > static void __init_or_module poison_endbr(void *addr, bool warn) > { > - u32 endbr, poison = gen_endbr_poison(); > - > - if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) > - return; > + u32 poison = gen_endbr_poison(); > > - if (!is_endbr(endbr)) { > + if (!is_endbr(addr)) { > WARN_ON_ONCE(warn); > return; > } > @@ -988,7 +996,7 @@ static u32 cfi_seed __ro_after_init; > static u32 cfi_rehash(u32 hash) > { > hash ^= cfi_seed; > - while (unlikely(is_endbr(hash) || is_endbr(-hash))) { > + while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) { > bool lsb = hash & 1; > hash >>= 1; > if (lsb) > --- a/arch/x86/kernel/kprobes/core.c > +++ b/arch/x86/kernel/kprobes/core.c > @@ -373,16 +373,7 @@ static bool can_probe(unsigned long padd > kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, > bool *on_func_entry) > { > - u32 insn; > - > - /* > - * Since 'addr' is not guaranteed to be safe to access, use > - * copy_from_kernel_nofault() to read the instruction: > - */ > - if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) > - return NULL; > - > - if (is_endbr(insn)) { > + if (is_endbr((u32 *)addr)) { > *on_func_entry = !offset || offset == 4; > if (*on_func_entry) > offset = 4; > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -625,7 +625,7 @@ int bpf_arch_text_poke(void *ip, enum bp > * See emit_prologue(), for IBT builds the trampoline hook is preceded > * with an ENDBR instruction. > */ > - if (is_endbr(*(u32 *)ip)) > + if (is_endbr(ip)) > ip += ENDBR_INSN_SIZE; > > return __bpf_arch_text_poke(ip, t, old_addr, new_addr); > @@ -2971,7 +2971,7 @@ static int __arch_prepare_bpf_trampoline > /* skip patched call instruction and point orig_call to actual > * body of the kernel function. > */ > - if (is_endbr(*(u32 *)orig_call)) > + if (is_endbr(orig_call)) > orig_call += ENDBR_INSN_SIZE; > orig_call += X86_PATCH_SIZE; > } > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -1027,19 +1027,9 @@ static const struct bpf_func_proto bpf_g > #ifdef CONFIG_X86_KERNEL_IBT > static unsigned long get_entry_ip(unsigned long fentry_ip) > { > - u32 instr; > + if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE))) > + return fentry_ip - ENDBR_INSN_SIZE; > > - /* We want to be extra safe in case entry ip is on the page edge, > - * but otherwise we need to avoid get_kernel_nofault()'s overhead. > - */ > - if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { > - if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) > - return fentry_ip; > - } else { > - instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); > - } > - if (is_endbr(instr)) > - fentry_ip -= ENDBR_INSN_SIZE; > return fentry_ip; > } > #else LGTM. Acked-by: Andrii Nakryiko <andrii@kernel.org>
On Mon, Sep 30, 2024 at 2:33 AM Peter Zijlstra <peterz@infradead.org> wrote: > > > Which then leads to me rewriting the proposed patch as... ... > +__noendbr bool is_endbr(u32 *val) > +{ > + u32 endbr; > + > + __get_kernel_nofault(&endbr, val, u32, Efault); > + return __is_endbr(endbr); > + > +Efault: > + return false; > +} That looks much better. Acked-by: Alexei Starovoitov <ast@kernel.org>
On Fri, Sep 27, 2024 at 09:49:03PM +0200, Peter Zijlstra wrote: > Pretty much every caller of is_endbr() actually wants to test something at an > address and ends up doing get_kernel_nofault(). Fold the lot into a more > convenient helper. > > Note: this effectively reverts commit a8497506cd2c ("bpf: Avoid > get_kernel_nofault() to fetch kprobe entry IP") which was entirely the > wrong way to go about doing things. The right solution is to optimize > get_kernel_nofault() itself, it really doesn't need STAC/CLAC nor the > speculation barrier. Using __get_user is a historical hack, not a > requirement. But these patches don't actually optimize get_kernel_nofault()? -- Josh
On Fri, Sep 27, 2024 at 05:04:44PM -0700, Josh Poimboeuf wrote: > On Fri, Sep 27, 2024 at 09:49:03PM +0200, Peter Zijlstra wrote: > > Pretty much every caller of is_endbr() actually wants to test something at an > > address and ends up doing get_kernel_nofault(). Fold the lot into a more > > convenient helper. > > > > Note: this effectively reverts commit a8497506cd2c ("bpf: Avoid > > get_kernel_nofault() to fetch kprobe entry IP") which was entirely the > > wrong way to go about doing things. The right solution is to optimize > > get_kernel_nofault() itself, it really doesn't need STAC/CLAC nor the > > speculation barrier. Using __get_user is a historical hack, not a > > requirement. > > But these patches don't actually optimize get_kernel_nofault()? No, I figured there was enough there already. Also, given the state I was in, I'd probably get it wrong. I have it on a todo list somewhere though. It shouldn't be too hard.
© 2016 - 2024 Red Hat, Inc.