For now, the "nop" will be replaced with a "call" instruction when a
function is hooked by the ftrace. However, sometimes the "call" can break
the RSB and introduce extra overhead. Therefore, introduce the flag
FTRACE_OPS_FL_JMP, which indicate that the ftrace_ops should be called
with a "jmp" instead of "call". For now, it is only used by the direct
call case.
When a direct ftrace_ops is marked with FTRACE_OPS_FL_JMP, the last bit of
the ops->direct_call will be set to 1. Therefore, we can tell if we should
use "jmp" for the callback in ftrace_call_replace().
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/linux/ftrace.h | 33 +++++++++++++++++++++++++++++++++
kernel/trace/Kconfig | 12 ++++++++++++
kernel/trace/ftrace.c | 9 ++++++++-
3 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7ded7df6e9b5..14705dec1b08 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -351,6 +351,7 @@ enum {
FTRACE_OPS_FL_DIRECT = BIT(17),
FTRACE_OPS_FL_SUBOP = BIT(18),
FTRACE_OPS_FL_GRAPH = BIT(19),
+ FTRACE_OPS_FL_JMP = BIT(20),
};
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -569,6 +570,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
unsigned long addr) { }
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
+static inline bool ftrace_is_jmp(unsigned long addr)
+{
+ return addr & 1;
+}
+
+static inline unsigned long ftrace_jmp_set(unsigned long addr)
+{
+ return addr | 1UL;
+}
+
+static inline unsigned long ftrace_jmp_get(unsigned long addr)
+{
+ return addr & ~1UL;
+}
+#else
+static inline bool ftrace_is_jmp(unsigned long addr)
+{
+ return false;
+}
+
+static inline unsigned long ftrace_jmp_set(unsigned long addr)
+{
+ return addr;
+}
+
+static inline unsigned long ftrace_jmp_get(unsigned long addr)
+{
+ return addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */
+
#ifdef CONFIG_STACK_TRACER
int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d2c79da81e4f..4661b9e606e0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -80,6 +80,12 @@ config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
If the architecture generates __patchable_function_entries sections
but does not want them included in the ftrace locations.
+config HAVE_DYNAMIC_FTRACE_WITH_JMP
+ bool
+ help
+ If the architecture supports to replace the __fentry__ with a
+ "jmp" instruction.
+
config HAVE_SYSCALL_TRACEPOINTS
bool
help
@@ -330,6 +336,12 @@ config DYNAMIC_FTRACE_WITH_ARGS
depends on DYNAMIC_FTRACE
depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
+config DYNAMIC_FTRACE_WITH_JMP
+ def_bool y
+ depends on DYNAMIC_FTRACE
+ depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ depends on HAVE_DYNAMIC_FTRACE_WITH_JMP
+
config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index efb5ce32298f..8d7b2a7f4b15 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5938,7 +5938,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
for (i = 0; i < size; i++) {
hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
del = __ftrace_lookup_ip(direct_functions, entry->ip);
- if (del && del->direct == addr) {
+ if (del && ftrace_jmp_get(del->direct) ==
+ ftrace_jmp_get(addr)) {
remove_hash_entry(direct_functions, del);
kfree(del);
}
@@ -5994,6 +5995,9 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
mutex_lock(&direct_mutex);
+ if (ops->flags & FTRACE_OPS_FL_JMP)
+ addr = ftrace_jmp_set(addr);
+
/* Make sure requested entries are not already registered.. */
size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
@@ -6117,6 +6121,9 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
lockdep_assert_held_once(&direct_mutex);
+ if (ops->flags & FTRACE_OPS_FL_JMP)
+ addr = ftrace_jmp_set(addr);
+
/* Enable the tmp_ops to have the same functions as the direct ops */
ftrace_ops_init(&tmp_ops);
tmp_ops.func_hash = ops->func_hash;
--
2.51.2
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 7ded7df6e..14705dec1 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -351,6 +351,7 @@ enum {
> FTRACE_OPS_FL_DIRECT = BIT(17),
> FTRACE_OPS_FL_SUBOP = BIT(18),
> FTRACE_OPS_FL_GRAPH = BIT(19),
> + FTRACE_OPS_FL_JMP = BIT(20),
> };
>
> #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
[ ... ]
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
> +static inline bool ftrace_is_jmp(unsigned long addr)
> +{
> + return addr & 1;
> +}
> +
> +static inline unsigned long ftrace_jmp_set(unsigned long addr)
> +{
> + return addr | 1UL;
> +}
> +
> +static inline unsigned long ftrace_jmp_get(unsigned long addr)
> +{
> + return addr & ~1UL;
> +}
[ ... ]
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 42bd2ba68..b143a79e6 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -5938,7 +5938,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
> for (i = 0; i < size; i++) {
> hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
> del = __ftrace_lookup_ip(direct_functions, entry->ip);
> - if (del && del->direct == addr) {
> + if (del && ftrace_jmp_get(del->direct) ==
> + ftrace_jmp_get(addr)) {
> remove_hash_entry(direct_functions, del);
> kfree(del);
> }
[ ... ]
> @@ -5994,6 +5995,9 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
>
> mutex_lock(&direct_mutex);
>
> + if (ops->flags & FTRACE_OPS_FL_JMP)
> + addr = ftrace_jmp_set(addr);
> +
When FTRACE_OPS_FL_JMP is set, this encodes the flag into the last bit
of addr. The modified addr is then stored in entry->direct and
ops->direct_call. However, this flagged address later flows through
ftrace_find_rec_direct()->ftrace_get_addr_new()->ftrace_make_call()->
ftrace_call_replace() without being masked.
Can ftrace_call_replace() generate correct CALL instructions when addr
has the LSB set? On x86, ftrace_call_replace() calls text_gen_insn()
with the raw address to generate a relative CALL instruction. If addr
has bit 0 set to 1, this would generate a CALL to an odd address,
causing execution to skip the first instruction byte at the trampoline.
Similarly, at runtime call_direct_funcs() reads ops->direct_call and
passes it to arch_ftrace_set_direct_caller() without masking. The x86
assembly then does RET to this address, which would jump to addr+1 if
the LSB is set.
The commit message says "we can tell if we should use 'jmp' for the
callback in ftrace_call_replace()" but ftrace_call_replace() isn't
modified to check or mask the bit. Are there missing changes to handle
the encoded address in these paths?
> /* Make sure requested entries are not already registered.. */
> size = 1 << hash->size_bits;
> for (i = 0; i < size; i++) {
[ ... ]
> @@ -6117,6 +6121,9 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
>
> lockdep_assert_held_once(&direct_mutex);
>
> + if (ops->flags & FTRACE_OPS_FL_JMP)
> + addr = ftrace_jmp_set(addr);
> +
Same issue here - the flagged addr is stored but downstream consumers
don't mask it before using as a jump target.
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/19360353328
© 2016 - 2026 Red Hat, Inc.