[PATCH RFC bpf-next 1/7] ftrace: introduce FTRACE_OPS_FL_JMP

Menglong Dong posted 7 patches 2 months, 3 weeks ago
There is a newer version of this series
[PATCH RFC bpf-next 1/7] ftrace: introduce FTRACE_OPS_FL_JMP
Posted by Menglong Dong 2 months, 3 weeks ago
For now, the "nop" will be replaced with a "call" instruction when a
function is hooked by the ftrace. However, sometimes the "call" can break
the RSB and introduce extra overhead. Therefore, introduce the flag
FTRACE_OPS_FL_JMP, which indicate that the ftrace_ops should be called
with a "jmp" instead of "call". For now, it is only used by the direct
call case.

When a direct ftrace_ops is marked with FTRACE_OPS_FL_JMP, the last bit of
the ops->direct_call will be set to 1. Therefore, we can tell if we should
use "jmp" for the callback in ftrace_call_replace().

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
 include/linux/ftrace.h | 33 +++++++++++++++++++++++++++++++++
 kernel/trace/Kconfig   | 12 ++++++++++++
 kernel/trace/ftrace.c  |  9 ++++++++-
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7ded7df6e9b5..14705dec1b08 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -351,6 +351,7 @@ enum {
 	FTRACE_OPS_FL_DIRECT			= BIT(17),
 	FTRACE_OPS_FL_SUBOP			= BIT(18),
 	FTRACE_OPS_FL_GRAPH			= BIT(19),
+	FTRACE_OPS_FL_JMP			= BIT(20),
 };
 
 #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -569,6 +570,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
 						 unsigned long addr) { }
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
+static inline bool ftrace_is_jmp(unsigned long addr)
+{
+	return addr & 1;
+}
+
+static inline unsigned long ftrace_jmp_set(unsigned long addr)
+{
+	return addr | 1UL;
+}
+
+static inline unsigned long ftrace_jmp_get(unsigned long addr)
+{
+	return addr & ~1UL;
+}
+#else
+static inline bool ftrace_is_jmp(unsigned long addr)
+{
+	return false;
+}
+
+static inline unsigned long ftrace_jmp_set(unsigned long addr)
+{
+	return addr;
+}
+
+static inline unsigned long ftrace_jmp_get(unsigned long addr)
+{
+	return addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */
+
 #ifdef CONFIG_STACK_TRACER
 
 int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d2c79da81e4f..4661b9e606e0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -80,6 +80,12 @@ config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
 	  If the architecture generates __patchable_function_entries sections
 	  but does not want them included in the ftrace locations.
 
+config HAVE_DYNAMIC_FTRACE_WITH_JMP
+	bool
+	help
+	  If the architecture supports to replace the __fentry__ with a
+	  "jmp" instruction.
+
 config HAVE_SYSCALL_TRACEPOINTS
 	bool
 	help
@@ -330,6 +336,12 @@ config DYNAMIC_FTRACE_WITH_ARGS
 	depends on DYNAMIC_FTRACE
 	depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
+config DYNAMIC_FTRACE_WITH_JMP
+	def_bool y
+	depends on DYNAMIC_FTRACE
+	depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+	depends on HAVE_DYNAMIC_FTRACE_WITH_JMP
+
 config FPROBE
 	bool "Kernel Function Probe (fprobe)"
 	depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index efb5ce32298f..8d7b2a7f4b15 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5938,7 +5938,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
 	for (i = 0; i < size; i++) {
 		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
 			del = __ftrace_lookup_ip(direct_functions, entry->ip);
-			if (del && del->direct == addr) {
+			if (del && ftrace_jmp_get(del->direct) ==
+				   ftrace_jmp_get(addr)) {
 				remove_hash_entry(direct_functions, del);
 				kfree(del);
 			}
@@ -5994,6 +5995,9 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 
 	mutex_lock(&direct_mutex);
 
+	if (ops->flags & FTRACE_OPS_FL_JMP)
+		addr = ftrace_jmp_set(addr);
+
 	/* Make sure requested entries are not already registered.. */
 	size = 1 << hash->size_bits;
 	for (i = 0; i < size; i++) {
@@ -6117,6 +6121,9 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 
 	lockdep_assert_held_once(&direct_mutex);
 
+	if (ops->flags & FTRACE_OPS_FL_JMP)
+		addr = ftrace_jmp_set(addr);
+
 	/* Enable the tmp_ops to have the same functions as the direct ops */
 	ftrace_ops_init(&tmp_ops);
 	tmp_ops.func_hash = ops->func_hash;
-- 
2.51.2
Re: [PATCH RFC bpf-next 1/7] ftrace: introduce FTRACE_OPS_FL_JMP
Posted by bot+bpf-ci@kernel.org 2 months, 3 weeks ago
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 7ded7df6e..14705dec1 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -351,6 +351,7 @@ enum {
>  	FTRACE_OPS_FL_DIRECT			= BIT(17),
>  	FTRACE_OPS_FL_SUBOP			= BIT(18),
>  	FTRACE_OPS_FL_GRAPH			= BIT(19),
> +	FTRACE_OPS_FL_JMP			= BIT(20),
>  };
>
>  #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS

[ ... ]

> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
> +static inline bool ftrace_is_jmp(unsigned long addr)
> +{
> +	return addr & 1;
> +}
> +
> +static inline unsigned long ftrace_jmp_set(unsigned long addr)
> +{
> +	return addr | 1UL;
> +}
> +
> +static inline unsigned long ftrace_jmp_get(unsigned long addr)
> +{
> +	return addr & ~1UL;
> +}

[ ... ]

> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 42bd2ba68..b143a79e6 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -5938,7 +5938,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
>  	for (i = 0; i < size; i++) {
>  		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
>  			del = __ftrace_lookup_ip(direct_functions, entry->ip);
> -			if (del && del->direct == addr) {
> +			if (del && ftrace_jmp_get(del->direct) ==
> +				   ftrace_jmp_get(addr)) {
>  				remove_hash_entry(direct_functions, del);
>  				kfree(del);
>  			}

[ ... ]

> @@ -5994,6 +5995,9 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
>
>  	mutex_lock(&direct_mutex);
>
> +	if (ops->flags & FTRACE_OPS_FL_JMP)
> +		addr = ftrace_jmp_set(addr);
> +

When FTRACE_OPS_FL_JMP is set, this encodes the flag into the last bit
of addr. The modified addr is then stored in entry->direct and
ops->direct_call. However, this flagged address later flows through
ftrace_find_rec_direct()->ftrace_get_addr_new()->ftrace_make_call()->
ftrace_call_replace() without being masked.

Can ftrace_call_replace() generate correct CALL instructions when addr
has the LSB set? On x86, ftrace_call_replace() calls text_gen_insn()
with the raw address to generate a relative CALL instruction. If addr
has bit 0 set to 1, this would generate a CALL to an odd address,
causing execution to skip the first instruction byte at the trampoline.

Similarly, at runtime call_direct_funcs() reads ops->direct_call and
passes it to arch_ftrace_set_direct_caller() without masking. The x86
assembly then does RET to this address, which would jump to addr+1 if
the LSB is set.

The commit message says "we can tell if we should use 'jmp' for the
callback in ftrace_call_replace()" but ftrace_call_replace() isn't
modified to check or mask the bit. Are there missing changes to handle
the encoded address in these paths?

>  	/* Make sure requested entries are not already registered.. */
>  	size = 1 << hash->size_bits;
>  	for (i = 0; i < size; i++) {

[ ... ]

> @@ -6117,6 +6121,9 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
>
>  	lockdep_assert_held_once(&direct_mutex);
>
> +	if (ops->flags & FTRACE_OPS_FL_JMP)
> +		addr = ftrace_jmp_set(addr);
> +

Same issue here - the flagged addr is stored but downstream consumers
don't mask it before using as a jump target.


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/19360353328