[PATCH bpf-next v3 1/6] ftrace: introduce FTRACE_OPS_FL_JMP

Menglong Dong posted 6 patches 1 week, 6 days ago
[PATCH bpf-next v3 1/6] ftrace: introduce FTRACE_OPS_FL_JMP
Posted by Menglong Dong 1 week, 6 days ago
For now, the "nop" will be replaced with a "call" instruction when a
function is hooked by the ftrace. However, sometimes the "call" can break
the RSB and introduce extra overhead. Therefore, introduce the flag
FTRACE_OPS_FL_JMP, which indicate that the ftrace_ops should be called
with a "jmp" instead of "call". For now, it is only used by the direct
call case.

When a direct ftrace_ops is marked with FTRACE_OPS_FL_JMP, the last bit of
the ops->direct_call will be set to 1. Therefore, we can tell if we should
use "jmp" for the callback in ftrace_call_replace().

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v3:
- reject if the addr is already "jmp" in register_ftrace_direct() and
  __modify_ftrace_direct()
---
 include/linux/ftrace.h | 33 +++++++++++++++++++++++++++++++++
 kernel/trace/Kconfig   | 12 ++++++++++++
 kernel/trace/ftrace.c  | 17 ++++++++++++++++-
 3 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 07f8c309e432..015dd1049bea 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -359,6 +359,7 @@ enum {
 	FTRACE_OPS_FL_DIRECT			= BIT(17),
 	FTRACE_OPS_FL_SUBOP			= BIT(18),
 	FTRACE_OPS_FL_GRAPH			= BIT(19),
+	FTRACE_OPS_FL_JMP			= BIT(20),
 };
 
 #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -577,6 +578,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
 						 unsigned long addr) { }
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
+static inline bool ftrace_is_jmp(unsigned long addr)
+{
+	return addr & 1;
+}
+
+static inline unsigned long ftrace_jmp_set(unsigned long addr)
+{
+	return addr | 1UL;
+}
+
+static inline unsigned long ftrace_jmp_get(unsigned long addr)
+{
+	return addr & ~1UL;
+}
+#else
+static inline bool ftrace_is_jmp(unsigned long addr)
+{
+	return false;
+}
+
+static inline unsigned long ftrace_jmp_set(unsigned long addr)
+{
+	return addr;
+}
+
+static inline unsigned long ftrace_jmp_get(unsigned long addr)
+{
+	return addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */
+
 #ifdef CONFIG_STACK_TRACER
 
 int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d2c79da81e4f..4661b9e606e0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -80,6 +80,12 @@ config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
 	  If the architecture generates __patchable_function_entries sections
 	  but does not want them included in the ftrace locations.
 
+config HAVE_DYNAMIC_FTRACE_WITH_JMP
+	bool
+	help
+	  If the architecture supports to replace the __fentry__ with a
+	  "jmp" instruction.
+
 config HAVE_SYSCALL_TRACEPOINTS
 	bool
 	help
@@ -330,6 +336,12 @@ config DYNAMIC_FTRACE_WITH_ARGS
 	depends on DYNAMIC_FTRACE
 	depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
+config DYNAMIC_FTRACE_WITH_JMP
+	def_bool y
+	depends on DYNAMIC_FTRACE
+	depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+	depends on HAVE_DYNAMIC_FTRACE_WITH_JMP
+
 config FPROBE
 	bool "Kernel Function Probe (fprobe)"
 	depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 59cfacb8a5bb..bbb37c0f8c6c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5951,7 +5951,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
 	for (i = 0; i < size; i++) {
 		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
 			del = __ftrace_lookup_ip(direct_functions, entry->ip);
-			if (del && del->direct == addr) {
+			if (del && ftrace_jmp_get(del->direct) ==
+				   ftrace_jmp_get(addr)) {
 				remove_hash_entry(direct_functions, del);
 				kfree(del);
 			}
@@ -6016,8 +6017,15 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 	if (ftrace_hash_empty(hash))
 		return -EINVAL;
 
+	/* This is a "raw" address, and this should never happen. */
+	if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
+		return -EINVAL;
+
 	mutex_lock(&direct_mutex);
 
+	if (ops->flags & FTRACE_OPS_FL_JMP)
+		addr = ftrace_jmp_set(addr);
+
 	/* Make sure requested entries are not already registered.. */
 	size = 1 << hash->size_bits;
 	for (i = 0; i < size; i++) {
@@ -6138,6 +6146,13 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 
 	lockdep_assert_held_once(&direct_mutex);
 
+	/* This is a "raw" address, and this should never happen. */
+	if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
+		return -EINVAL;
+
+	if (ops->flags & FTRACE_OPS_FL_JMP)
+		addr = ftrace_jmp_set(addr);
+
 	/* Enable the tmp_ops to have the same functions as the direct ops */
 	ftrace_ops_init(&tmp_ops);
 	tmp_ops.func_hash = ops->func_hash;
-- 
2.51.2
Re: [PATCH bpf-next v3 1/6] ftrace: introduce FTRACE_OPS_FL_JMP
Posted by bot+bpf-ci@kernel.org 1 week, 6 days ago
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 07f8c309e..015dd1049 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h

[ ... ]

> @@ -577,6 +578,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
>  						 unsigned long addr) { }
>  #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
> +static inline bool ftrace_is_jmp(unsigned long addr)
> +{
> +	return addr & 1;
> +}
> +
> +static inline unsigned long ftrace_jmp_set(unsigned long addr)
> +{
> +	return addr | 1UL;
> +}
> +
> +static inline unsigned long ftrace_jmp_get(unsigned long addr)
> +{
> +	return addr & ~1UL;
> +}
> +#else
> +static inline bool ftrace_is_jmp(unsigned long addr)
> +{
> +	return false;
> +}
> +
> +static inline unsigned long ftrace_jmp_set(unsigned long addr)
> +{
> +	return addr;
> +}
> +
> +static inline unsigned long ftrace_jmp_get(unsigned long addr)
> +{
> +	return addr;
> +}
> +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */

[ ... ]

> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 59cfacb8a..bbb37c0f8 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c

[ ... ]

> @@ -5951,7 +5951,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
>  	for (i = 0; i < size; i++) {
>  		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
>  			del = __ftrace_lookup_ip(direct_functions, entry->ip);
> -			if (del && del->direct == addr) {
> +			if (del && ftrace_jmp_get(del->direct) ==
> +				   ftrace_jmp_get(addr)) {
>  				remove_hash_entry(direct_functions, del);
>  				kfree(del);
>  			}

[ ... ]

> @@ -6016,8 +6017,15 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
>  	if (ftrace_hash_empty(hash))
>  		return -EINVAL;
>
> +	/* This is a "raw" address, and this should never happen. */
> +	if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
> +		return -EINVAL;
> +
>  	mutex_lock(&direct_mutex);
>
> +	if (ops->flags & FTRACE_OPS_FL_JMP)
> +		addr = ftrace_jmp_set(addr);
> +
>  	/* Make sure requested entries are not already registered.. */

[ ... ]

>  	/* ... and add the new entries */
>  	size = 1 << hash->size_bits;
>  	for (i = 0; i < size; i++) {
>  		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
>  			new = add_hash_entry(new_hash, entry->ip);
>  			if (!new)
>  				goto out_unlock;
>  			/* Update both the copy and the hash entry */
>  			new->direct = addr;
>  			entry->direct = addr;
>  		}
>  	}

After addr is potentially encoded with ftrace_jmp_set(), it gets stored in
entry->direct. However, ftrace_find_rec_direct() returns entry->direct
without masking:

unsigned long ftrace_find_rec_direct(unsigned long ip)
{
	struct ftrace_func_entry *entry;

	entry = __ftrace_lookup_ip(direct_functions, ip);
	if (!entry)
		return 0;

	return entry->direct;
}

This encoded address flows to two consumers:

1. ftrace_get_addr_new() and ftrace_get_addr_curr() return it directly,
   which eventually reaches architecture-specific ftrace_call_replace().
   Can the architecture code handle addresses with LSB set?

2. The debug output at kernel/trace/ftrace.c in ftrace_enabled_show():
   seq_printf(m, "\n\tdirect-->%pS", (void *)direct);

   Won't this print the wrong symbol due to the LSB being set?

>  	ops->func = call_direct_funcs;
>  	ops->flags = MULTI_FLAGS;
>  	ops->trampoline = FTRACE_REGS_ADDR;
>  	ops->direct_call = addr;

Similarly, ops->direct_call stores the encoded address. In
call_direct_funcs():

static void call_direct_funcs(unsigned long ip, unsigned long pip,
			      struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
	unsigned long addr = READ_ONCE(ops->direct_call);

	if (!addr)
		return;

	arch_ftrace_set_direct_caller(fregs, addr);
}

The encoded address is passed directly to arch_ftrace_set_direct_caller()
without masking. Looking at arch implementations like x86:

static inline void
__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
{
	/* Emulate a call */
	regs->orig_ax = addr;
}

Does arch_ftrace_set_direct_caller() expect addresses with the LSB set?
The implementations directly store the address into registers without any
masking.

The commit message says "we can tell if we should use 'jmp' for the
callback in ftrace_call_replace()", but no architecture code is updated
to check the LSB. Should ftrace_find_rec_direct() and call_direct_funcs()
mask the JMP bit before returning addresses to architecture code?

> @@ -6136,8 +6146,13 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
>
>  	lockdep_assert_held_once(&direct_mutex);
>
> +	/* This is a "raw" address, and this should never happen. */
> +	if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
> +		return -EINVAL;
> +
> +	if (ops->flags & FTRACE_OPS_FL_JMP)
> +		addr = ftrace_jmp_set(addr);

Same encoding happens here with the same flow issues.


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/19466635856
Re: [PATCH bpf-next v3 1/6] ftrace: introduce FTRACE_OPS_FL_JMP
Posted by Steven Rostedt 1 week, 6 days ago
On Tue, 18 Nov 2025 13:25:04 +0000 (UTC)
bot+bpf-ci@kernel.org wrote:

> The commit message says "we can tell if we should use 'jmp' for the
> callback in ftrace_call_replace()", but no architecture code is updated
> to check the LSB. Should ftrace_find_rec_direct() and call_direct_funcs()
> mask the JMP bit before returning addresses to architecture code?

I guess AI isn't smart enough to know about kernel config options yet.

> +config DYNAMIC_FTRACE_WITH_JMP
> +	def_bool y
> +	depends on DYNAMIC_FTRACE
> +	depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> +	depends on HAVE_DYNAMIC_FTRACE_WITH_JMP

Where this code is only implemented when HAVE_DYNAMIC_FTRACE_WITH_JMP is
set.

-- Steve