[PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls

Jiri Olsa posted 9 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
Posted by Jiri Olsa 1 month, 3 weeks ago
Using single ftrace_ops for direct calls update instead of allocating
ftrace_ops object for each trampoline.

With single ftrace_ops object we can use update_ftrace_direct_* api
that allows multiple ip sites updates on single ftrace_ops object.

Adding HAVE_SINGLE_FTRACE_DIRECT_OPS config option to be enabled on
each arch that supports this.

At the moment we can enable this only on x86 arch, because arm relies
on ftrace_ops object representing just single trampoline image (stored
in ftrace_ops::direct_call). Ach that do not support this will continue
to use *_ftrace_direct api.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/Kconfig        |   1 +
 kernel/bpf/trampoline.c | 195 ++++++++++++++++++++++++++++++++++------
 kernel/trace/Kconfig    |   3 +
 kernel/trace/ftrace.c   |   7 +-
 4 files changed, 177 insertions(+), 29 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 17a107cc5244..d0c36e49e66e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -335,6 +335,7 @@ config X86
 	select SCHED_SMT			if SMP
 	select ARCH_SUPPORTS_SCHED_CLUSTER	if SMP
 	select ARCH_SUPPORTS_SCHED_MC		if SMP
+	select HAVE_SINGLE_FTRACE_DIRECT_OPS	if X86_64 && DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 17af2aad8382..02371db3db3e 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -33,12 +33,40 @@ static DEFINE_MUTEX(trampoline_mutex);
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
 
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
+{
+	struct hlist_head *head_ip;
+	struct bpf_trampoline *tr;
+
+	mutex_lock(&trampoline_mutex);
+	head_ip = &trampoline_ip_table[hash_64(ip, TRAMPOLINE_HASH_BITS)];
+	hlist_for_each_entry(tr, head_ip, hlist_ip) {
+		if (tr->ip == ip)
+			goto out;
+	}
+	tr = NULL;
+out:
+	mutex_unlock(&trampoline_mutex);
+	return tr;
+}
+#else
+static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
+{
+	return ops->private;
+}
+#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
+
 static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
 				     enum ftrace_ops_cmd cmd)
 {
-	struct bpf_trampoline *tr = ops->private;
+	struct bpf_trampoline *tr;
 	int ret = 0;
 
+	tr = direct_ops_ip_lookup(ops, ip);
+	if (!tr)
+		return -EINVAL;
+
 	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
 		/* This is called inside register_ftrace_direct_multi(), so
 		 * tr->mutex is already locked.
@@ -137,6 +165,139 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
 			   PAGE_SIZE, true, ksym->name);
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+/*
+ * We have only single direct_ops which contains all the direct call
+ * sites and is the only global ftrace_ops for all trampolines.
+ *
+ * We use 'update_ftrace_direct_*' api for attachment.
+ */
+struct ftrace_ops direct_ops = {
+	.ops_func = bpf_tramp_ftrace_ops_func,
+};
+
+static int direct_ops_alloc(struct bpf_trampoline *tr)
+{
+	tr->fops = &direct_ops;
+	return 0;
+}
+
+static void direct_ops_free(struct bpf_trampoline *tr) { }
+
+static struct ftrace_hash *hash_from_ip(struct bpf_trampoline *tr, void *ptr)
+{
+	unsigned long ip, addr = (unsigned long) ptr;
+	struct ftrace_hash *hash;
+
+	ip = ftrace_location(tr->ip);
+	if (!ip)
+		return NULL;
+	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+	if (!hash)
+		return NULL;
+	if (bpf_trampoline_use_jmp(tr->flags))
+		addr = ftrace_jmp_set(addr);
+	if (!add_hash_entry_direct(hash, ip, addr)) {
+		free_ftrace_hash(hash);
+		return NULL;
+	}
+	return hash;
+}
+
+static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
+{
+	struct ftrace_hash *hash = hash_from_ip(tr, addr);
+	int err = -ENOMEM;
+
+	if (hash)
+		err = update_ftrace_direct_add(tr->fops, hash);
+	free_ftrace_hash(hash);
+	return err;
+}
+
+static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
+{
+	struct ftrace_hash *hash = hash_from_ip(tr, addr);
+	int err = -ENOMEM;
+
+	if (hash)
+		err = update_ftrace_direct_del(tr->fops, hash);
+	free_ftrace_hash(hash);
+	return err;
+}
+
+static int direct_ops_mod(struct bpf_trampoline *tr, void *addr, bool lock_direct_mutex)
+{
+	struct ftrace_hash *hash = hash_from_ip(tr, addr);
+	int err = -ENOMEM;
+
+	if (hash)
+		err = update_ftrace_direct_mod(tr->fops, hash, lock_direct_mutex);
+	free_ftrace_hash(hash);
+	return err;
+}
+#else
+/*
+ * We allocate ftrace_ops object for each trampoline and it contains
+ * call site specific for that trampoline.
+ *
+ * We use *_ftrace_direct api for attachment.
+ */
+static int direct_ops_alloc(struct bpf_trampoline *tr)
+{
+	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
+	if (!tr->fops)
+		return -ENOMEM;
+	tr->fops->private = tr;
+	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
+	return 0;
+}
+
+static void direct_ops_free(struct bpf_trampoline *tr)
+{
+	if (tr->fops) {
+		ftrace_free_filter(tr->fops);
+		kfree(tr->fops);
+	}
+}
+
+static int direct_ops_add(struct bpf_trampoline *tr, void *ptr)
+{
+	unsigned long addr = (unsigned long) ptr;
+	struct ftrace_ops *ops = tr->fops;
+	int ret;
+
+	if (bpf_trampoline_use_jmp(tr->flags))
+		addr = ftrace_jmp_set(addr);
+
+	ret = ftrace_set_filter_ip(ops, tr->ip, 0, 1);
+	if (ret)
+		return ret;
+	return register_ftrace_direct(ops, addr);
+}
+
+static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
+{
+	return unregister_ftrace_direct(tr->fops, (long)addr, false);
+}
+
+static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
+{
+	unsigned long addr = (unsigned long) ptr;
+	struct ftrace_ops *ops = tr->fops;
+
+	if (bpf_trampoline_use_jmp(tr->flags))
+		addr = ftrace_jmp_set(addr);
+	if (lock_direct_mutex)
+		return modify_ftrace_direct(ops, addr);
+	return modify_ftrace_direct_nolock(ops, addr);
+}
+#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
+#else
+static void direct_ops_free(struct bpf_trampoline *tr) { }
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
 static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
 {
 	struct bpf_trampoline *tr;
@@ -155,14 +316,11 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
 	if (!tr)
 		goto out;
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
-	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
-	if (!tr->fops) {
+	if (direct_ops_alloc(tr)) {
 		kfree(tr);
 		tr = NULL;
 		goto out;
 	}
-	tr->fops->private = tr;
-	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
 #endif
 
 	tr->key = key;
@@ -206,7 +364,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
 	int ret;
 
 	if (tr->func.ftrace_managed)
-		ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
+		ret = direct_ops_del(tr, old_addr);
 	else
 		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
 
@@ -220,15 +378,7 @@ static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
 	int ret;
 
 	if (tr->func.ftrace_managed) {
-		unsigned long addr = (unsigned long) new_addr;
-
-		if (bpf_trampoline_use_jmp(tr->flags))
-			addr = ftrace_jmp_set(addr);
-
-		if (lock_direct_mutex)
-			ret = modify_ftrace_direct(tr->fops, addr);
-		else
-			ret = modify_ftrace_direct_nolock(tr->fops, addr);
+		ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
 	} else {
 		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
 						   new_addr);
@@ -251,15 +401,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 	}
 
 	if (tr->func.ftrace_managed) {
-		unsigned long addr = (unsigned long) new_addr;
-
-		if (bpf_trampoline_use_jmp(tr->flags))
-			addr = ftrace_jmp_set(addr);
-
-		ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
-		if (ret)
-			return ret;
-		ret = register_ftrace_direct(tr->fops, addr);
+		ret = direct_ops_add(tr, new_addr);
 	} else {
 		ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
 	}
@@ -910,10 +1052,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
 	 */
 	hlist_del(&tr->hlist_key);
 	hlist_del(&tr->hlist_ip);
-	if (tr->fops) {
-		ftrace_free_filter(tr->fops);
-		kfree(tr->fops);
-	}
+	direct_ops_free(tr);
 	kfree(tr);
 out:
 	mutex_unlock(&trampoline_mutex);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4661b9e606e0..1ad2e307c834 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -50,6 +50,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
 config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 	bool
 
+config HAVE_SINGLE_FTRACE_DIRECT_OPS
+	bool
+
 config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
 	bool
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c2054fe80de7..a0789727b971 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2605,8 +2605,13 @@ unsigned long ftrace_find_rec_direct(unsigned long ip)
 static void call_direct_funcs(unsigned long ip, unsigned long pip,
 			      struct ftrace_ops *ops, struct ftrace_regs *fregs)
 {
-	unsigned long addr = READ_ONCE(ops->direct_call);
+	unsigned long addr;
 
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+	addr = ftrace_find_rec_direct(ip);
+#else
+	addr = READ_ONCE(ops->direct_call);
+#endif
 	if (!addr)
 		return;
 
-- 
2.52.0
Re: [PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
Posted by kernel test robot 1 month, 2 weeks ago
Hi Jiri,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Jiri-Olsa/ftrace-bpf-Remove-FTRACE_OPS_FL_JMP-ftrace_ops-flag/20251216-052916
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20251215211402.353056-10-jolsa%40kernel.org
patch subject: [PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
config: riscv-allmodconfig (https://download.01.org/0day-ci/archive/20251221/202512210241.4wuAmCHu-lkp@intel.com/config)
compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project b324c9f4fa112d61a553bf489b5f4f7ceea05ea8)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251221/202512210241.4wuAmCHu-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512210241.4wuAmCHu-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/bpf/trampoline.c:367:9: error: call to undeclared function 'direct_ops_del'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     367 |                 ret = direct_ops_del(tr, old_addr);
         |                       ^
   kernel/bpf/trampoline.c:367:9: note: did you mean 'direct_ops_free'?
   kernel/bpf/trampoline.c:298:13: note: 'direct_ops_free' declared here
     298 | static void direct_ops_free(struct bpf_trampoline *tr) { }
         |             ^
>> kernel/bpf/trampoline.c:381:9: error: call to undeclared function 'direct_ops_mod'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     381 |                 ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
         |                       ^
   kernel/bpf/trampoline.c:381:9: note: did you mean 'direct_ops_free'?
   kernel/bpf/trampoline.c:298:13: note: 'direct_ops_free' declared here
     298 | static void direct_ops_free(struct bpf_trampoline *tr) { }
         |             ^
>> kernel/bpf/trampoline.c:404:9: error: call to undeclared function 'direct_ops_add'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     404 |                 ret = direct_ops_add(tr, new_addr);
         |                       ^
   kernel/bpf/trampoline.c:404:9: note: did you mean 'direct_ops_free'?
   kernel/bpf/trampoline.c:298:13: note: 'direct_ops_free' declared here
     298 | static void direct_ops_free(struct bpf_trampoline *tr) { }
         |             ^
   3 errors generated.


vim +/direct_ops_del +367 kernel/bpf/trampoline.c

   360	
   361	static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
   362				     void *old_addr)
   363	{
   364		int ret;
   365	
   366		if (tr->func.ftrace_managed)
 > 367			ret = direct_ops_del(tr, old_addr);
   368		else
   369			ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
   370	
   371		return ret;
   372	}
   373	
   374	static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
   375				 void *old_addr, void *new_addr,
   376				 bool lock_direct_mutex)
   377	{
   378		int ret;
   379	
   380		if (tr->func.ftrace_managed) {
 > 381			ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
   382		} else {
   383			ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
   384							   new_addr);
   385		}
   386		return ret;
   387	}
   388	
   389	/* first time registering */
   390	static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
   391	{
   392		void *ip = tr->func.addr;
   393		unsigned long faddr;
   394		int ret;
   395	
   396		faddr = ftrace_location((unsigned long)ip);
   397		if (faddr) {
   398			if (!tr->fops)
   399				return -ENOTSUPP;
   400			tr->func.ftrace_managed = true;
   401		}
   402	
   403		if (tr->func.ftrace_managed) {
 > 404			ret = direct_ops_add(tr, new_addr);
   405		} else {
   406			ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
   407		}
   408	
   409		return ret;
   410	}
   411	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
Posted by kernel test robot 1 month, 2 weeks ago
Hi Jiri,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Jiri-Olsa/ftrace-bpf-Remove-FTRACE_OPS_FL_JMP-ftrace_ops-flag/20251216-052916
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20251215211402.353056-10-jolsa%40kernel.org
patch subject: [PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
config: arc-allyesconfig (https://download.01.org/0day-ci/archive/20251221/202512211826.gtdm52TX-lkp@intel.com/config)
compiler: arc-linux-gcc (GCC) 15.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251221/202512211826.gtdm52TX-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512211826.gtdm52TX-lkp@intel.com/

All errors (new ones prefixed by >>):

   kernel/bpf/trampoline.c: In function 'unregister_fentry':
>> kernel/bpf/trampoline.c:367:23: error: implicit declaration of function 'direct_ops_del'; did you mean 'direct_ops_free'? [-Wimplicit-function-declaration]
     367 |                 ret = direct_ops_del(tr, old_addr);
         |                       ^~~~~~~~~~~~~~
         |                       direct_ops_free
   kernel/bpf/trampoline.c: In function 'modify_fentry':
>> kernel/bpf/trampoline.c:381:23: error: implicit declaration of function 'direct_ops_mod'; did you mean 'direct_ops_free'? [-Wimplicit-function-declaration]
     381 |                 ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
         |                       ^~~~~~~~~~~~~~
         |                       direct_ops_free
   kernel/bpf/trampoline.c: In function 'register_fentry':
>> kernel/bpf/trampoline.c:404:23: error: implicit declaration of function 'direct_ops_add'; did you mean 'direct_ops_free'? [-Wimplicit-function-declaration]
     404 |                 ret = direct_ops_add(tr, new_addr);
         |                       ^~~~~~~~~~~~~~
         |                       direct_ops_free


vim +367 kernel/bpf/trampoline.c

   360	
   361	static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
   362				     void *old_addr)
   363	{
   364		int ret;
   365	
   366		if (tr->func.ftrace_managed)
 > 367			ret = direct_ops_del(tr, old_addr);
   368		else
   369			ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
   370	
   371		return ret;
   372	}
   373	
   374	static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
   375				 void *old_addr, void *new_addr,
   376				 bool lock_direct_mutex)
   377	{
   378		int ret;
   379	
   380		if (tr->func.ftrace_managed) {
 > 381			ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
   382		} else {
   383			ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
   384							   new_addr);
   385		}
   386		return ret;
   387	}
   388	
   389	/* first time registering */
   390	static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
   391	{
   392		void *ip = tr->func.addr;
   393		unsigned long faddr;
   394		int ret;
   395	
   396		faddr = ftrace_location((unsigned long)ip);
   397		if (faddr) {
   398			if (!tr->fops)
   399				return -ENOTSUPP;
   400			tr->func.ftrace_managed = true;
   401		}
   402	
   403		if (tr->func.ftrace_managed) {
 > 404			ret = direct_ops_add(tr, new_addr);
   405		} else {
   406			ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
   407		}
   408	
   409		return ret;
   410	}
   411	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
Posted by Steven Rostedt 1 month, 3 weeks ago
On Mon, 15 Dec 2025 22:14:02 +0100
Jiri Olsa <jolsa@kernel.org> wrote:

> Using single ftrace_ops for direct calls update instead of allocating
> ftrace_ops object for each trampoline.
> 
> With single ftrace_ops object we can use update_ftrace_direct_* api
> that allows multiple ip sites updates on single ftrace_ops object.
> 
> Adding HAVE_SINGLE_FTRACE_DIRECT_OPS config option to be enabled on
> each arch that supports this.
> 
> At the moment we can enable this only on x86 arch, because arm relies
> on ftrace_ops object representing just single trampoline image (stored
> in ftrace_ops::direct_call). Ach that do not support this will continue

My back "Ach" and doesn't support me well. ;-)

> to use *_ftrace_direct api.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  arch/x86/Kconfig        |   1 +
>  kernel/bpf/trampoline.c | 195 ++++++++++++++++++++++++++++++++++------
>  kernel/trace/Kconfig    |   3 +
>  kernel/trace/ftrace.c   |   7 +-
>  4 files changed, 177 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 17a107cc5244..d0c36e49e66e 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -335,6 +335,7 @@ config X86
>  	select SCHED_SMT			if SMP
>  	select ARCH_SUPPORTS_SCHED_CLUSTER	if SMP
>  	select ARCH_SUPPORTS_SCHED_MC		if SMP
> +	select HAVE_SINGLE_FTRACE_DIRECT_OPS	if X86_64 && DYNAMIC_FTRACE_WITH_DIRECT_CALLS

You can remove the "&& DYNAMIC_FTRACE_WITH_DIRECT_CALLS" part by having the
config depend on it (see below).

>  
>  config INSTRUCTION_DECODER
>  	def_bool y
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 17af2aad8382..02371db3db3e 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -33,12 +33,40 @@ static DEFINE_MUTEX(trampoline_mutex);
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
>  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
>  
> +#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS

Make this:

 #ifdef CONFIG_SINGLE_FTRACE_DIRECT_OPS

for the suggested modification in the Kconfig below.

> +static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
> +{
> +	struct hlist_head *head_ip;
> +	struct bpf_trampoline *tr;
> +
> +	mutex_lock(&trampoline_mutex);

	guard(mutex)(&trampoline_mutex);

> +	head_ip = &trampoline_ip_table[hash_64(ip, TRAMPOLINE_HASH_BITS)];
> +	hlist_for_each_entry(tr, head_ip, hlist_ip) {
> +		if (tr->ip == ip)

			return NULL;

> +			goto out;
> +	}


> +	tr = NULL;
> +out:
> +	mutex_unlock(&trampoline_mutex);

No need for the above

> +	return tr;
> +}
> +#else
> +static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
> +{
> +	return ops->private;
> +}
> +#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
> +
>  static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
>  				     enum ftrace_ops_cmd cmd)
>  {
> -	struct bpf_trampoline *tr = ops->private;
> +	struct bpf_trampoline *tr;
>  	int ret = 0;
>  
> +	tr = direct_ops_ip_lookup(ops, ip);
> +	if (!tr)
> +		return -EINVAL;
> +
>  	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
>  		/* This is called inside register_ftrace_direct_multi(), so
>  		 * tr->mutex is already locked.
> @@ -137,6 +165,139 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
>  			   PAGE_SIZE, true, ksym->name);
>  }
>  
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> +#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS

Replace the above two with:

 #ifdef CONFIG_SINGLE_FTRACE_DIRECT_OPS

> +/*
> + * We have only single direct_ops which contains all the direct call
> + * sites and is the only global ftrace_ops for all trampolines.
> + *
> + * We use 'update_ftrace_direct_*' api for attachment.
> + */
> +struct ftrace_ops direct_ops = {
> +	.ops_func = bpf_tramp_ftrace_ops_func,
> +};
> +
> +static int direct_ops_alloc(struct bpf_trampoline *tr)
> +{
> +	tr->fops = &direct_ops;
> +	return 0;
> +}
> +
> +static void direct_ops_free(struct bpf_trampoline *tr) { }
> +
> +static struct ftrace_hash *hash_from_ip(struct bpf_trampoline *tr, void *ptr)
> +{
> +	unsigned long ip, addr = (unsigned long) ptr;
> +	struct ftrace_hash *hash;
> +
> +	ip = ftrace_location(tr->ip);
> +	if (!ip)
> +		return NULL;
> +	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
> +	if (!hash)
> +		return NULL;
> +	if (bpf_trampoline_use_jmp(tr->flags))
> +		addr = ftrace_jmp_set(addr);
> +	if (!add_hash_entry_direct(hash, ip, addr)) {
> +		free_ftrace_hash(hash);
> +		return NULL;
> +	}
> +	return hash;
> +}
> +
> +static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
> +{
> +	struct ftrace_hash *hash = hash_from_ip(tr, addr);
> +	int err = -ENOMEM;
> +
> +	if (hash)
> +		err = update_ftrace_direct_add(tr->fops, hash);
> +	free_ftrace_hash(hash);
> +	return err;
> +}

I think these functions would be cleaner as:

{
	struct ftrace_hash *hash = hash_from_ip(tr, addr);
	int err;

	if (!hash)
		return -ENOMEM;

	err = update_ftrace_direct_*(tr->fops, hash);
	free_ftrace_hash(hash);
	return err;
}


> +
> +static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
> +{
> +	struct ftrace_hash *hash = hash_from_ip(tr, addr);
> +	int err = -ENOMEM;
> +
> +	if (hash)
> +		err = update_ftrace_direct_del(tr->fops, hash);
> +	free_ftrace_hash(hash);
> +	return err;
> +}
> +
> +static int direct_ops_mod(struct bpf_trampoline *tr, void *addr, bool lock_direct_mutex)
> +{
> +	struct ftrace_hash *hash = hash_from_ip(tr, addr);
> +	int err = -ENOMEM;
> +
> +	if (hash)
> +		err = update_ftrace_direct_mod(tr->fops, hash, lock_direct_mutex);
> +	free_ftrace_hash(hash);
> +	return err;
> +}
> +#else
> +/*
> + * We allocate ftrace_ops object for each trampoline and it contains
> + * call site specific for that trampoline.
> + *
> + * We use *_ftrace_direct api for attachment.
> + */
> +static int direct_ops_alloc(struct bpf_trampoline *tr)
> +{
> +	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
> +	if (!tr->fops)
> +		return -ENOMEM;
> +	tr->fops->private = tr;
> +	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
> +	return 0;
> +}
> +
> +static void direct_ops_free(struct bpf_trampoline *tr)
> +{
> +	if (tr->fops) {
> +		ftrace_free_filter(tr->fops);
> +		kfree(tr->fops);
> +	}
> +}

Why not:

static void direct_ops_free(struct bpf_trampoline *tr)
{
	if (!tr->fops)
		return;

	ftrace_free_filter(tr->fops);
	kfree(tr->fops);
}

 ?

> +
> +static int direct_ops_add(struct bpf_trampoline *tr, void *ptr)
> +{
> +	unsigned long addr = (unsigned long) ptr;
> +	struct ftrace_ops *ops = tr->fops;
> +	int ret;
> +
> +	if (bpf_trampoline_use_jmp(tr->flags))
> +		addr = ftrace_jmp_set(addr);
> +
> +	ret = ftrace_set_filter_ip(ops, tr->ip, 0, 1);
> +	if (ret)
> +		return ret;
> +	return register_ftrace_direct(ops, addr);
> +}
> +
> +static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
> +{
> +	return unregister_ftrace_direct(tr->fops, (long)addr, false);
> +}
> +
> +static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
> +{
> +	unsigned long addr = (unsigned long) ptr;
> +	struct ftrace_ops *ops = tr->fops;
> +
> +	if (bpf_trampoline_use_jmp(tr->flags))
> +		addr = ftrace_jmp_set(addr);
> +	if (lock_direct_mutex)
> +		return modify_ftrace_direct(ops, addr);
> +	return modify_ftrace_direct_nolock(ops, addr);
> +}
> +#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
> +#else
> +static void direct_ops_free(struct bpf_trampoline *tr) { }

This is somewhat inconsistent with direct_ops_alloc() that has:

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
	if (direct_ops_alloc(tr)) {
		kfree(tr);
		tr = NULL;
		goto out;
	}
#endif

Now, if you wrap the direct_ops_free() too, we can remove the
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
part with my kconfig suggestion. Otherwise keep the kconfig as is, but I
would add a stub function for direct_ops_alloc() too.

> +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
> +
>  static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
>  {
>  	struct bpf_trampoline *tr;
> @@ -155,14 +316,11 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
>  	if (!tr)
>  		goto out;
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> -	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
> -	if (!tr->fops) {
> +	if (direct_ops_alloc(tr)) {
>  		kfree(tr);
>  		tr = NULL;
>  		goto out;
>  	}
> -	tr->fops->private = tr;
> -	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
>  #endif
>  
>  	tr->key = key;
> @@ -206,7 +364,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
>  	int ret;
>  
>  	if (tr->func.ftrace_managed)
> -		ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
> +		ret = direct_ops_del(tr, old_addr);

Doesn't this need a wrapper too?

>  	else
>  		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
>  
> @@ -220,15 +378,7 @@ static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
>  	int ret;
>  
>  	if (tr->func.ftrace_managed) {
> -		unsigned long addr = (unsigned long) new_addr;
> -
> -		if (bpf_trampoline_use_jmp(tr->flags))
> -			addr = ftrace_jmp_set(addr);
> -
> -		if (lock_direct_mutex)
> -			ret = modify_ftrace_direct(tr->fops, addr);
> -		else
> -			ret = modify_ftrace_direct_nolock(tr->fops, addr);
> +		ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);

and this.

>  	} else {
>  		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
>  						   new_addr);
> @@ -251,15 +401,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
>  	}
>  
>  	if (tr->func.ftrace_managed) {
> -		unsigned long addr = (unsigned long) new_addr;
> -
> -		if (bpf_trampoline_use_jmp(tr->flags))
> -			addr = ftrace_jmp_set(addr);
> -
> -		ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
> -		if (ret)
> -			return ret;
> -		ret = register_ftrace_direct(tr->fops, addr);
> +		ret = direct_ops_add(tr, new_addr);

Ditto.

>  	} else {
>  		ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
>  	}
> @@ -910,10 +1052,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
>  	 */
>  	hlist_del(&tr->hlist_key);
>  	hlist_del(&tr->hlist_ip);
> -	if (tr->fops) {
> -		ftrace_free_filter(tr->fops);
> -		kfree(tr->fops);
> -	}
> +	direct_ops_free(tr);
>  	kfree(tr);
>  out:
>  	mutex_unlock(&trampoline_mutex);
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 4661b9e606e0..1ad2e307c834 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -50,6 +50,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
>  config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
>  	bool
>  
> +config HAVE_SINGLE_FTRACE_DIRECT_OPS
> +	bool
> +

Now you could add:

  config SINGLE_FTRACE_DIRECT_OPS
	bool
	default y
	depends on HAVE_SINGLE_FTRACE_DIRECT_OPS && DYNAMIC_FTRACE_WITH_DIRECT_CALLS

-- Steve


>  config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
>  	bool 
>  
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index c2054fe80de7..a0789727b971 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -2605,8 +2605,13 @@ unsigned long ftrace_find_rec_direct(unsigned long ip)
>  static void call_direct_funcs(unsigned long ip, unsigned long pip,
>  			      struct ftrace_ops *ops, struct ftrace_regs *fregs)
>  {
> -	unsigned long addr = READ_ONCE(ops->direct_call);
> +	unsigned long addr;
>  
> +#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
> +	addr = ftrace_find_rec_direct(ip);
> +#else
> +	addr = READ_ONCE(ops->direct_call);
> +#endif
>  	if (!addr)
>  		return;
>
Re: [PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
Posted by Jiri Olsa 1 month, 3 weeks ago
On Thu, Dec 18, 2025 at 11:26:08AM -0500, Steven Rostedt wrote:
> On Mon, 15 Dec 2025 22:14:02 +0100
> Jiri Olsa <jolsa@kernel.org> wrote:
> 
> > Using single ftrace_ops for direct calls update instead of allocating
> > ftrace_ops object for each trampoline.
> > 
> > With single ftrace_ops object we can use update_ftrace_direct_* api
> > that allows multiple ip sites updates on single ftrace_ops object.
> > 
> > Adding HAVE_SINGLE_FTRACE_DIRECT_OPS config option to be enabled on
> > each arch that supports this.
> > 
> > At the moment we can enable this only on x86 arch, because arm relies
> > on ftrace_ops object representing just single trampoline image (stored
> > in ftrace_ops::direct_call). Ach that do not support this will continue
> 
> My back "Ach" and doesn't support me well. ;-)

heh, should have been 'Archs' ;-)

> 
> > to use *_ftrace_direct api.
> > 
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  arch/x86/Kconfig        |   1 +
> >  kernel/bpf/trampoline.c | 195 ++++++++++++++++++++++++++++++++++------
> >  kernel/trace/Kconfig    |   3 +
> >  kernel/trace/ftrace.c   |   7 +-
> >  4 files changed, 177 insertions(+), 29 deletions(-)
> > 
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index 17a107cc5244..d0c36e49e66e 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -335,6 +335,7 @@ config X86
> >  	select SCHED_SMT			if SMP
> >  	select ARCH_SUPPORTS_SCHED_CLUSTER	if SMP
> >  	select ARCH_SUPPORTS_SCHED_MC		if SMP
> > +	select HAVE_SINGLE_FTRACE_DIRECT_OPS	if X86_64 && DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> 
> You can remove the "&& DYNAMIC_FTRACE_WITH_DIRECT_CALLS" part by having the
> config depend on it (see below).

...

> 
> >  
> >  config INSTRUCTION_DECODER
> >  	def_bool y
> > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > index 17af2aad8382..02371db3db3e 100644
> > --- a/kernel/bpf/trampoline.c
> > +++ b/kernel/bpf/trampoline.c
> > @@ -33,12 +33,40 @@ static DEFINE_MUTEX(trampoline_mutex);
> >  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> >  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
> >  
> > +#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
> 
> Make this:
> 
>  #ifdef CONFIG_SINGLE_FTRACE_DIRECT_OPS
> 
> for the suggested modification in the Kconfig below.
> 
> > +static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
> > +{
> > +	struct hlist_head *head_ip;
> > +	struct bpf_trampoline *tr;
> > +
> > +	mutex_lock(&trampoline_mutex);
> 
> 	guard(mutex)(&trampoline_mutex);

right, will change

> 
> > +	head_ip = &trampoline_ip_table[hash_64(ip, TRAMPOLINE_HASH_BITS)];
> > +	hlist_for_each_entry(tr, head_ip, hlist_ip) {
> > +		if (tr->ip == ip)
> 
> 			return NULL;
> 
> > +			goto out;
> > +	}
> 
> 
> > +	tr = NULL;
> > +out:
> > +	mutex_unlock(&trampoline_mutex);
> 
> No need for the above

yep

> 
> > +	return tr;
> > +}
> > +#else
> > +static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
> > +{
> > +	return ops->private;
> > +}
> > +#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
> > +
> >  static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
> >  				     enum ftrace_ops_cmd cmd)
> >  {
> > -	struct bpf_trampoline *tr = ops->private;
> > +	struct bpf_trampoline *tr;
> >  	int ret = 0;
> >  
> > +	tr = direct_ops_ip_lookup(ops, ip);
> > +	if (!tr)
> > +		return -EINVAL;
> > +
> >  	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
> >  		/* This is called inside register_ftrace_direct_multi(), so
> >  		 * tr->mutex is already locked.
> > @@ -137,6 +165,139 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
> >  			   PAGE_SIZE, true, ksym->name);
> >  }
> >  
> > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> > +#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
> 
> Replace the above two with:
> 
>  #ifdef CONFIG_SINGLE_FTRACE_DIRECT_OPS

...

> 
> > +/*
> > + * We have only single direct_ops which contains all the direct call
> > + * sites and is the only global ftrace_ops for all trampolines.
> > + *
> > + * We use 'update_ftrace_direct_*' api for attachment.
> > + */
> > +struct ftrace_ops direct_ops = {
> > +	.ops_func = bpf_tramp_ftrace_ops_func,
> > +};
> > +
> > +static int direct_ops_alloc(struct bpf_trampoline *tr)
> > +{
> > +	tr->fops = &direct_ops;
> > +	return 0;
> > +}
> > +
> > +static void direct_ops_free(struct bpf_trampoline *tr) { }
> > +
> > +static struct ftrace_hash *hash_from_ip(struct bpf_trampoline *tr, void *ptr)
> > +{
> > +	unsigned long ip, addr = (unsigned long) ptr;
> > +	struct ftrace_hash *hash;
> > +
> > +	ip = ftrace_location(tr->ip);
> > +	if (!ip)
> > +		return NULL;
> > +	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
> > +	if (!hash)
> > +		return NULL;
> > +	if (bpf_trampoline_use_jmp(tr->flags))
> > +		addr = ftrace_jmp_set(addr);
> > +	if (!add_hash_entry_direct(hash, ip, addr)) {
> > +		free_ftrace_hash(hash);
> > +		return NULL;
> > +	}
> > +	return hash;
> > +}
> > +
> > +static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
> > +{
> > +	struct ftrace_hash *hash = hash_from_ip(tr, addr);
> > +	int err = -ENOMEM;
> > +
> > +	if (hash)
> > +		err = update_ftrace_direct_add(tr->fops, hash);
> > +	free_ftrace_hash(hash);
> > +	return err;
> > +}
> 
> I think these functions would be cleaner as:
> 
> {
> 	struct ftrace_hash *hash = hash_from_ip(tr, addr);
> 	int err;
> 
> 	if (!hash)
> 		return -ENOMEM;
> 
> 	err = update_ftrace_direct_*(tr->fops, hash);
> 	free_ftrace_hash(hash);
> 	return err;
> }

np, will change

> 
> 
> > +
> > +static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
> > +{
> > +	struct ftrace_hash *hash = hash_from_ip(tr, addr);
> > +	int err = -ENOMEM;
> > +
> > +	if (hash)
> > +		err = update_ftrace_direct_del(tr->fops, hash);
> > +	free_ftrace_hash(hash);
> > +	return err;
> > +}
> > +
> > +static int direct_ops_mod(struct bpf_trampoline *tr, void *addr, bool lock_direct_mutex)
> > +{
> > +	struct ftrace_hash *hash = hash_from_ip(tr, addr);
> > +	int err = -ENOMEM;
> > +
> > +	if (hash)
> > +		err = update_ftrace_direct_mod(tr->fops, hash, lock_direct_mutex);
> > +	free_ftrace_hash(hash);
> > +	return err;
> > +}
> > +#else
> > +/*
> > + * We allocate ftrace_ops object for each trampoline and it contains
> > + * call site specific for that trampoline.
> > + *
> > + * We use *_ftrace_direct api for attachment.
> > + */
> > +static int direct_ops_alloc(struct bpf_trampoline *tr)
> > +{
> > +	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
> > +	if (!tr->fops)
> > +		return -ENOMEM;
> > +	tr->fops->private = tr;
> > +	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
> > +	return 0;
> > +}
> > +
> > +static void direct_ops_free(struct bpf_trampoline *tr)
> > +{
> > +	if (tr->fops) {
> > +		ftrace_free_filter(tr->fops);
> > +		kfree(tr->fops);
> > +	}
> > +}
> 
> Why not:
> 
> static void direct_ops_free(struct bpf_trampoline *tr)
> {
> 	if (!tr->fops)
> 		return;
> 
> 	ftrace_free_filter(tr->fops);
> 	kfree(tr->fops);
> }

same pattern like above, ok

> 
>  ?
> 
> > +
> > +static int direct_ops_add(struct bpf_trampoline *tr, void *ptr)
> > +{
> > +	unsigned long addr = (unsigned long) ptr;
> > +	struct ftrace_ops *ops = tr->fops;
> > +	int ret;
> > +
> > +	if (bpf_trampoline_use_jmp(tr->flags))
> > +		addr = ftrace_jmp_set(addr);
> > +
> > +	ret = ftrace_set_filter_ip(ops, tr->ip, 0, 1);
> > +	if (ret)
> > +		return ret;
> > +	return register_ftrace_direct(ops, addr);
> > +}
> > +
> > +static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
> > +{
> > +	return unregister_ftrace_direct(tr->fops, (long)addr, false);
> > +}
> > +
> > +static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
> > +{
> > +	unsigned long addr = (unsigned long) ptr;
> > +	struct ftrace_ops *ops = tr->fops;
> > +
> > +	if (bpf_trampoline_use_jmp(tr->flags))
> > +		addr = ftrace_jmp_set(addr);
> > +	if (lock_direct_mutex)
> > +		return modify_ftrace_direct(ops, addr);
> > +	return modify_ftrace_direct_nolock(ops, addr);
> > +}
> > +#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
> > +#else
> > +static void direct_ops_free(struct bpf_trampoline *tr) { }
> 
> This is somewhat inconsistent with direct_ops_alloc() that has:
> 
> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> 	if (direct_ops_alloc(tr)) {
> 		kfree(tr);
> 		tr = NULL;
> 		goto out;
> 	}
> #endif
> 
> Now, if you wrap the direct_ops_free() too, we can remove the
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> part with my kconfig suggestion. Otherwise keep the kconfig as is, but I
> would add a stub function for direct_ops_alloc() too.

ah right.. I think let's do the kconfig change you suggest to make
this simpler

> 
> > +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
> > +
> >  static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
> >  {
> >  	struct bpf_trampoline *tr;
> > @@ -155,14 +316,11 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
> >  	if (!tr)
> >  		goto out;
> >  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> > -	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
> > -	if (!tr->fops) {
> > +	if (direct_ops_alloc(tr)) {
> >  		kfree(tr);
> >  		tr = NULL;
> >  		goto out;
> >  	}
> > -	tr->fops->private = tr;
> > -	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
> >  #endif
> >  
> >  	tr->key = key;
> > @@ -206,7 +364,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
> >  	int ret;
> >  
> >  	if (tr->func.ftrace_managed)
> > -		ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
> > +		ret = direct_ops_del(tr, old_addr);
> 
> Doesn't this need a wrapper too?

yep

> 
> >  	else
> >  		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
> >  
> > @@ -220,15 +378,7 @@ static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
> >  	int ret;
> >  
> >  	if (tr->func.ftrace_managed) {
> > -		unsigned long addr = (unsigned long) new_addr;
> > -
> > -		if (bpf_trampoline_use_jmp(tr->flags))
> > -			addr = ftrace_jmp_set(addr);
> > -
> > -		if (lock_direct_mutex)
> > -			ret = modify_ftrace_direct(tr->fops, addr);
> > -		else
> > -			ret = modify_ftrace_direct_nolock(tr->fops, addr);
> > +		ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
> 
> and this.
> 
> >  	} else {
> >  		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
> >  						   new_addr);
> > @@ -251,15 +401,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
> >  	}
> >  
> >  	if (tr->func.ftrace_managed) {
> > -		unsigned long addr = (unsigned long) new_addr;
> > -
> > -		if (bpf_trampoline_use_jmp(tr->flags))
> > -			addr = ftrace_jmp_set(addr);
> > -
> > -		ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
> > -		if (ret)
> > -			return ret;
> > -		ret = register_ftrace_direct(tr->fops, addr);
> > +		ret = direct_ops_add(tr, new_addr);
> 
> Ditto.

yes

> 
> >  	} else {
> >  		ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
> >  	}
> > @@ -910,10 +1052,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
> >  	 */
> >  	hlist_del(&tr->hlist_key);
> >  	hlist_del(&tr->hlist_ip);
> > -	if (tr->fops) {
> > -		ftrace_free_filter(tr->fops);
> > -		kfree(tr->fops);
> > -	}
> > +	direct_ops_free(tr);
> >  	kfree(tr);
> >  out:
> >  	mutex_unlock(&trampoline_mutex);
> > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> > index 4661b9e606e0..1ad2e307c834 100644
> > --- a/kernel/trace/Kconfig
> > +++ b/kernel/trace/Kconfig
> > @@ -50,6 +50,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
> >  config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> >  	bool
> >  
> > +config HAVE_SINGLE_FTRACE_DIRECT_OPS
> > +	bool
> > +
> 
> Now you could add:
> 
>   config SINGLE_FTRACE_DIRECT_OPS
> 	bool
> 	default y
> 	depends on HAVE_SINGLE_FTRACE_DIRECT_OPS && DYNAMIC_FTRACE_WITH_DIRECT_CALLS

ok, the dependency is more ovbvious, will change

thanks,
jirka
Re: [PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
Posted by Jiri Olsa 1 month, 1 week ago
On Fri, Dec 19, 2025 at 10:27:56AM +0100, Jiri Olsa wrote:

> > > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> > > index 4661b9e606e0..1ad2e307c834 100644
> > > --- a/kernel/trace/Kconfig
> > > +++ b/kernel/trace/Kconfig
> > > @@ -50,6 +50,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
> > >  config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> > >  	bool
> > >  
> > > +config HAVE_SINGLE_FTRACE_DIRECT_OPS
> > > +	bool
> > > +
> > 
> > Now you could add:
> > 
> >   config SINGLE_FTRACE_DIRECT_OPS
> > 	bool
> > 	default y
> > 	depends on HAVE_SINGLE_FTRACE_DIRECT_OPS && DYNAMIC_FTRACE_WITH_DIRECT_CALLS
> 
> ok, the dependency is more ovbvious, will change
> 
> thanks,
> jirka

actualy, it seems that having it the original way with adding the rest
of the wrappers for !CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS case is
easier AFAICS

jirka


---
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 80527299f859..53bf2cf7ff6f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -336,6 +336,7 @@ config X86
 	select SCHED_SMT			if SMP
 	select ARCH_SUPPORTS_SCHED_CLUSTER	if SMP
 	select ARCH_SUPPORTS_SCHED_MC		if SMP
+	select HAVE_SINGLE_FTRACE_DIRECT_OPS	if X86_64 && DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index e5a0d58ed6dc..a8b3f510280a 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -33,12 +33,40 @@ static DEFINE_MUTEX(trampoline_mutex);
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
 
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
+{
+	struct hlist_head *head_ip;
+	struct bpf_trampoline *tr;
+
+	mutex_lock(&trampoline_mutex);
+	head_ip = &trampoline_ip_table[hash_64(ip, TRAMPOLINE_HASH_BITS)];
+	hlist_for_each_entry(tr, head_ip, hlist_ip) {
+		if (tr->ip == ip)
+			goto out;
+	}
+	tr = NULL;
+out:
+	mutex_unlock(&trampoline_mutex);
+	return tr;
+}
+#else
+static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
+{
+	return ops->private;
+}
+#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
+
 static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
 				     enum ftrace_ops_cmd cmd)
 {
-	struct bpf_trampoline *tr = ops->private;
+	struct bpf_trampoline *tr;
 	int ret = 0;
 
+	tr = direct_ops_ip_lookup(ops, ip);
+	if (!tr)
+		return -EINVAL;
+
 	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
 		/* This is called inside register_ftrace_direct_multi(), so
 		 * tr->mutex is already locked.
@@ -137,6 +165,159 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
 			   PAGE_SIZE, true, ksym->name);
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+/*
+ * We have only single direct_ops which contains all the direct call
+ * sites and is the only global ftrace_ops for all trampolines.
+ *
+ * We use 'update_ftrace_direct_*' api for attachment.
+ */
+struct ftrace_ops direct_ops = {
+	.ops_func = bpf_tramp_ftrace_ops_func,
+};
+
+static int direct_ops_alloc(struct bpf_trampoline *tr)
+{
+	tr->fops = &direct_ops;
+	return 0;
+}
+
+static void direct_ops_free(struct bpf_trampoline *tr) { }
+
+static struct ftrace_hash *hash_from_ip(struct bpf_trampoline *tr, void *ptr)
+{
+	unsigned long ip, addr = (unsigned long) ptr;
+	struct ftrace_hash *hash;
+
+	ip = ftrace_location(tr->ip);
+	if (!ip)
+		return NULL;
+	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+	if (!hash)
+		return NULL;
+	if (bpf_trampoline_use_jmp(tr->flags))
+		addr = ftrace_jmp_set(addr);
+	if (!add_ftrace_hash_entry_direct(hash, ip, addr)) {
+		free_ftrace_hash(hash);
+		return NULL;
+	}
+	return hash;
+}
+
+static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
+{
+	struct ftrace_hash *hash = hash_from_ip(tr, addr);
+	int err = -ENOMEM;
+
+	if (hash)
+		err = update_ftrace_direct_add(tr->fops, hash);
+	free_ftrace_hash(hash);
+	return err;
+}
+
+static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
+{
+	struct ftrace_hash *hash = hash_from_ip(tr, addr);
+	int err = -ENOMEM;
+
+	if (hash)
+		err = update_ftrace_direct_del(tr->fops, hash);
+	free_ftrace_hash(hash);
+	return err;
+}
+
+static int direct_ops_mod(struct bpf_trampoline *tr, void *addr, bool lock_direct_mutex)
+{
+	struct ftrace_hash *hash = hash_from_ip(tr, addr);
+	int err = -ENOMEM;
+
+	if (hash)
+		err = update_ftrace_direct_mod(tr->fops, hash, lock_direct_mutex);
+	free_ftrace_hash(hash);
+	return err;
+}
+#else
+/*
+ * We allocate ftrace_ops object for each trampoline and it contains
+ * call site specific for that trampoline.
+ *
+ * We use *_ftrace_direct api for attachment.
+ */
+static int direct_ops_alloc(struct bpf_trampoline *tr)
+{
+	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
+	if (!tr->fops)
+		return -ENOMEM;
+	tr->fops->private = tr;
+	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
+	return 0;
+}
+
+static void direct_ops_free(struct bpf_trampoline *tr)
+{
+	if (tr->fops) {
+		ftrace_free_filter(tr->fops);
+		kfree(tr->fops);
+	}
+}
+
+static int direct_ops_add(struct bpf_trampoline *tr, void *ptr)
+{
+	unsigned long addr = (unsigned long) ptr;
+	struct ftrace_ops *ops = tr->fops;
+	int ret;
+
+	if (bpf_trampoline_use_jmp(tr->flags))
+		addr = ftrace_jmp_set(addr);
+
+	ret = ftrace_set_filter_ip(ops, tr->ip, 0, 1);
+	if (ret)
+		return ret;
+	return register_ftrace_direct(ops, addr);
+}
+
+static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
+{
+	return unregister_ftrace_direct(tr->fops, (long)addr, false);
+}
+
+static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
+{
+	unsigned long addr = (unsigned long) ptr;
+	struct ftrace_ops *ops = tr->fops;
+
+	if (bpf_trampoline_use_jmp(tr->flags))
+		addr = ftrace_jmp_set(addr);
+	if (lock_direct_mutex)
+		return modify_ftrace_direct(ops, addr);
+	return modify_ftrace_direct_nolock(ops, addr);
+}
+#endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
+#else
+static void direct_ops_free(struct bpf_trampoline *tr) { }
+
+static int direct_ops_alloc(struct bpf_trampoline *tr)
+{
+	return 0;
+}
+
+static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
+{
+	return -ENODEV;
+}
+
+static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
+{
+	return -ENODEV;
+}
+
+static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
 static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
 {
 	struct bpf_trampoline *tr;
@@ -154,16 +335,11 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
 	if (!tr)
 		goto out;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
-	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
-	if (!tr->fops) {
+	if (direct_ops_alloc(tr)) {
 		kfree(tr);
 		tr = NULL;
 		goto out;
 	}
-	tr->fops->private = tr;
-	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
-#endif
 
 	tr->key = key;
 	tr->ip = ftrace_location(ip);
@@ -206,7 +382,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
 	int ret;
 
 	if (tr->func.ftrace_managed)
-		ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
+		ret = direct_ops_del(tr, old_addr);
 	else
 		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
 
@@ -220,15 +396,7 @@ static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
 	int ret;
 
 	if (tr->func.ftrace_managed) {
-		unsigned long addr = (unsigned long) new_addr;
-
-		if (bpf_trampoline_use_jmp(tr->flags))
-			addr = ftrace_jmp_set(addr);
-
-		if (lock_direct_mutex)
-			ret = modify_ftrace_direct(tr->fops, addr);
-		else
-			ret = modify_ftrace_direct_nolock(tr->fops, addr);
+		ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
 	} else {
 		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
 						   new_addr);
@@ -251,15 +419,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 	}
 
 	if (tr->func.ftrace_managed) {
-		unsigned long addr = (unsigned long) new_addr;
-
-		if (bpf_trampoline_use_jmp(tr->flags))
-			addr = ftrace_jmp_set(addr);
-
-		ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
-		if (ret)
-			return ret;
-		ret = register_ftrace_direct(tr->fops, addr);
+		ret = direct_ops_add(tr, new_addr);
 	} else {
 		ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
 	}
@@ -910,10 +1070,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
 	 */
 	hlist_del(&tr->hlist_key);
 	hlist_del(&tr->hlist_ip);
-	if (tr->fops) {
-		ftrace_free_filter(tr->fops);
-		kfree(tr->fops);
-	}
+	direct_ops_free(tr);
 	kfree(tr);
 out:
 	mutex_unlock(&trampoline_mutex);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index bfa2ec46e075..d7042a09fe46 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -50,6 +50,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
 config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 	bool
 
+config HAVE_SINGLE_FTRACE_DIRECT_OPS
+	bool
+
 config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
 	bool
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 02030f62d737..4ed910d3d00d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2631,8 +2631,13 @@ unsigned long ftrace_find_rec_direct(unsigned long ip)
 static void call_direct_funcs(unsigned long ip, unsigned long pip,
 			      struct ftrace_ops *ops, struct ftrace_regs *fregs)
 {
-	unsigned long addr = READ_ONCE(ops->direct_call);
+	unsigned long addr;
 
+#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
+	addr = ftrace_find_rec_direct(ip);
+#else
+	addr = READ_ONCE(ops->direct_call);
+#endif
 	if (!addr)
 		return;
Re: [PATCHv5 bpf-next 9/9] bpf,x86: Use single ftrace_ops for direct calls
Posted by Steven Rostedt 1 month, 1 week ago
On Sun, 28 Dec 2025 16:22:18 +0100
Jiri Olsa <olsajiri@gmail.com> wrote:

> actualy, it seems that having it the original way with adding the rest
> of the wrappers for !CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS case is
> easier AFAICS

I'm fine either way.

-- Steve