Adding support to add special mapping for user space trampoline with
following functions:
uprobe_trampoline_get - find or add uprobe_trampoline
uprobe_trampoline_put - remove or destroy uprobe_trampoline
The user space trampoline is exported as arch specific user space special
mapping through tramp_mapping, which is initialized in following changes
with new uprobe syscall.
The uprobe trampoline needs to be callable/reachable from the probed address,
so while searching for available address we use is_reachable_by_call function
to decide if the uprobe trampoline is callable from the probe address.
All uprobe_trampoline objects are stored in uprobes_state object and are
cleaned up when the process mm_struct goes down. Adding new arch hooks
for that, because this change is x86_64 specific.
Locking is provided by callers in following changes.
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
arch/x86/kernel/uprobes.c | 144 ++++++++++++++++++++++++++++++++++++++
include/linux/uprobes.h | 6 ++
kernel/events/uprobes.c | 10 +++
kernel/fork.c | 1 +
4 files changed, 161 insertions(+)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 77050e5a4680..6c4dcbdd0c3c 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -608,6 +608,150 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
*sr = utask->autask.saved_scratch_register;
}
}
+
+static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
+{
+ return -EPERM;
+}
+
+static struct page *tramp_mapping_pages[2] __ro_after_init;
+
+static struct vm_special_mapping tramp_mapping = {
+ .name = "[uprobes-trampoline]",
+ .mremap = tramp_mremap,
+ .pages = tramp_mapping_pages,
+};
+
+struct uprobe_trampoline {
+ struct hlist_node node;
+ unsigned long vaddr;
+};
+
+static bool is_reachable_by_call(unsigned long vtramp, unsigned long vaddr)
+{
+ long delta = (long)(vaddr + 5 - vtramp);
+
+ return delta >= INT_MIN && delta <= INT_MAX;
+}
+
+static unsigned long find_nearest_trampoline(unsigned long vaddr)
+{
+ struct vm_unmapped_area_info info = {
+ .length = PAGE_SIZE,
+ .align_mask = ~PAGE_MASK,
+ };
+ unsigned long low_limit, high_limit;
+ unsigned long low_tramp, high_tramp;
+ unsigned long call_end = vaddr + 5;
+
+ if (check_add_overflow(call_end, INT_MIN, &low_limit))
+ low_limit = PAGE_SIZE;
+
+ high_limit = call_end + INT_MAX;
+
+ /* Search up from the caller address. */
+ info.low_limit = call_end;
+ info.high_limit = min(high_limit, TASK_SIZE);
+ high_tramp = vm_unmapped_area(&info);
+
+ /* Search down from the caller address. */
+ info.low_limit = max(low_limit, PAGE_SIZE);
+ info.high_limit = call_end;
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ low_tramp = vm_unmapped_area(&info);
+
+ if (IS_ERR_VALUE(high_tramp) && IS_ERR_VALUE(low_tramp))
+ return -ENOMEM;
+ if (IS_ERR_VALUE(high_tramp))
+ return low_tramp;
+ if (IS_ERR_VALUE(low_tramp))
+ return high_tramp;
+
+ /* Return address that's closest to the caller address. */
+ if (call_end - low_tramp < high_tramp - call_end)
+ return low_tramp;
+ return high_tramp;
+}
+
+static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ struct mm_struct *mm = current->mm;
+ struct uprobe_trampoline *tramp;
+ struct vm_area_struct *vma;
+
+ if (!user_64bit_mode(regs))
+ return NULL;
+
+ vaddr = find_nearest_trampoline(vaddr);
+ if (IS_ERR_VALUE(vaddr))
+ return NULL;
+
+ tramp = kzalloc(sizeof(*tramp), GFP_KERNEL);
+ if (unlikely(!tramp))
+ return NULL;
+
+ tramp->vaddr = vaddr;
+ vma = _install_special_mapping(mm, tramp->vaddr, PAGE_SIZE,
+ VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
+ &tramp_mapping);
+ if (IS_ERR(vma)) {
+ kfree(tramp);
+ return NULL;
+ }
+ return tramp;
+}
+
+__maybe_unused
+static struct uprobe_trampoline *get_uprobe_trampoline(unsigned long vaddr, bool *new)
+{
+ struct uprobes_state *state = ¤t->mm->uprobes_state;
+ struct uprobe_trampoline *tramp = NULL;
+
+ if (vaddr > TASK_SIZE || vaddr < PAGE_SIZE)
+ return NULL;
+
+ hlist_for_each_entry(tramp, &state->head_tramps, node) {
+ if (is_reachable_by_call(tramp->vaddr, vaddr)) {
+ *new = false;
+ return tramp;
+ }
+ }
+
+ tramp = create_uprobe_trampoline(vaddr);
+ if (!tramp)
+ return NULL;
+
+ *new = true;
+ hlist_add_head(&tramp->node, &state->head_tramps);
+ return tramp;
+}
+
+static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
+{
+ /*
+ * We do not unmap and release uprobe trampoline page itself,
+ * because there's no easy way to make sure none of the threads
+ * is still inside the trampoline.
+ */
+ hlist_del(&tramp->node);
+ kfree(tramp);
+}
+
+void arch_uprobe_init_state(struct mm_struct *mm)
+{
+ INIT_HLIST_HEAD(&mm->uprobes_state.head_tramps);
+}
+
+void arch_uprobe_clear_state(struct mm_struct *mm)
+{
+ struct uprobes_state *state = &mm->uprobes_state;
+ struct uprobe_trampoline *tramp;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(tramp, n, &state->head_tramps, node)
+ destroy_uprobe_trampoline(tramp);
+}
#else /* 32-bit: */
/*
* No RIP-relative addressing on 32-bit
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 5080619560d4..b40d33aae016 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -17,6 +17,7 @@
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/seqlock.h>
+#include <linux/mutex.h>
struct uprobe;
struct vm_area_struct;
@@ -185,6 +186,9 @@ struct xol_area;
struct uprobes_state {
struct xol_area *xol_area;
+#ifdef CONFIG_X86_64
+ struct hlist_head head_tramps;
+#endif
};
typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr,
@@ -233,6 +237,8 @@ extern void uprobe_handle_trampoline(struct pt_regs *regs);
extern void *arch_uretprobe_trampoline(unsigned long *psize);
extern unsigned long uprobe_get_trampoline_vaddr(void);
extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len);
+extern void arch_uprobe_clear_state(struct mm_struct *mm);
+extern void arch_uprobe_init_state(struct mm_struct *mm);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6795b8d82b9c..acec91a676b7 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1802,6 +1802,14 @@ static struct xol_area *get_xol_area(void)
return area;
}
+void __weak arch_uprobe_clear_state(struct mm_struct *mm)
+{
+}
+
+void __weak arch_uprobe_init_state(struct mm_struct *mm)
+{
+}
+
/*
* uprobe_clear_state - Free the area allocated for slots.
*/
@@ -1813,6 +1821,8 @@ void uprobe_clear_state(struct mm_struct *mm)
delayed_uprobe_remove(NULL, mm);
mutex_unlock(&delayed_uprobe_lock);
+ arch_uprobe_clear_state(mm);
+
if (!area)
return;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1882d06b9138..78ab2009576c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1009,6 +1009,7 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
{
#ifdef CONFIG_UPROBES
mm->uprobes_state.xol_area = NULL;
+ arch_uprobe_init_state(mm);
#endif
}
--
2.50.1
On Sun, Jul 20, 2025 at 01:21:18PM +0200, Jiri Olsa wrote: > +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp) > +{ > + /* > + * We do not unmap and release uprobe trampoline page itself, > + * because there's no easy way to make sure none of the threads > + * is still inside the trampoline. > + */ > + hlist_del(&tramp->node); > + kfree(tramp); > +} I am somewhat confused; isn't this called from __mmput()->uprobe_clear_state()->arch_uprobe_clear_state ? At that time we don't have threads anymore and mm is about to be destroyed anyway.
On Tue, Aug 19, 2025 at 04:53:45PM +0200, Peter Zijlstra wrote: > On Sun, Jul 20, 2025 at 01:21:18PM +0200, Jiri Olsa wrote: > > +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp) > > +{ > > + /* > > + * We do not unmap and release uprobe trampoline page itself, > > + * because there's no easy way to make sure none of the threads > > + * is still inside the trampoline. > > + */ > > + hlist_del(&tramp->node); > > + kfree(tramp); > > +} > > I am somewhat confused; isn't this called from > __mmput()->uprobe_clear_state()->arch_uprobe_clear_state ? > > At that time we don't have threads anymore and mm is about to be > destroyed anyway. ah you mean the comment, right? we need to release tramp objects the comment is leftover from some of the previous version, it can be removed, sorry jirka
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 91440ff4cafad4c86322a612e523f7f021a493e7
Gitweb: https://git.kernel.org/tip/91440ff4cafad4c86322a612e523f7f021a493e7
Author: Jiri Olsa <jolsa@kernel.org>
AuthorDate: Sun, 20 Jul 2025 13:21:18 +02:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 21 Aug 2025 20:09:20 +02:00
uprobes/x86: Add mapping for optimized uprobe trampolines
Adding support to add special mapping for user space trampoline with
following functions:
uprobe_trampoline_get - find or add uprobe_trampoline
uprobe_trampoline_put - remove or destroy uprobe_trampoline
The user space trampoline is exported as arch specific user space special
mapping through tramp_mapping, which is initialized in following changes
with new uprobe syscall.
The uprobe trampoline needs to be callable/reachable from the probed address,
so while searching for available address we use is_reachable_by_call function
to decide if the uprobe trampoline is callable from the probe address.
All uprobe_trampoline objects are stored in uprobes_state object and are
cleaned up when the process mm_struct goes down. Adding new arch hooks
for that, because this change is x86_64 specific.
Locking is provided by callers in following changes.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20250720112133.244369-9-jolsa@kernel.org
---
arch/x86/kernel/uprobes.c | 144 +++++++++++++++++++++++++++++++++++++-
include/linux/uprobes.h | 6 ++-
kernel/events/uprobes.c | 10 +++-
kernel/fork.c | 1 +-
4 files changed, 161 insertions(+)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 77050e5..6c4dcbd 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -608,6 +608,150 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
*sr = utask->autask.saved_scratch_register;
}
}
+
+static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
+{
+ return -EPERM;
+}
+
+static struct page *tramp_mapping_pages[2] __ro_after_init;
+
+static struct vm_special_mapping tramp_mapping = {
+ .name = "[uprobes-trampoline]",
+ .mremap = tramp_mremap,
+ .pages = tramp_mapping_pages,
+};
+
+struct uprobe_trampoline {
+ struct hlist_node node;
+ unsigned long vaddr;
+};
+
+static bool is_reachable_by_call(unsigned long vtramp, unsigned long vaddr)
+{
+ long delta = (long)(vaddr + 5 - vtramp);
+
+ return delta >= INT_MIN && delta <= INT_MAX;
+}
+
+static unsigned long find_nearest_trampoline(unsigned long vaddr)
+{
+ struct vm_unmapped_area_info info = {
+ .length = PAGE_SIZE,
+ .align_mask = ~PAGE_MASK,
+ };
+ unsigned long low_limit, high_limit;
+ unsigned long low_tramp, high_tramp;
+ unsigned long call_end = vaddr + 5;
+
+ if (check_add_overflow(call_end, INT_MIN, &low_limit))
+ low_limit = PAGE_SIZE;
+
+ high_limit = call_end + INT_MAX;
+
+ /* Search up from the caller address. */
+ info.low_limit = call_end;
+ info.high_limit = min(high_limit, TASK_SIZE);
+ high_tramp = vm_unmapped_area(&info);
+
+ /* Search down from the caller address. */
+ info.low_limit = max(low_limit, PAGE_SIZE);
+ info.high_limit = call_end;
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ low_tramp = vm_unmapped_area(&info);
+
+ if (IS_ERR_VALUE(high_tramp) && IS_ERR_VALUE(low_tramp))
+ return -ENOMEM;
+ if (IS_ERR_VALUE(high_tramp))
+ return low_tramp;
+ if (IS_ERR_VALUE(low_tramp))
+ return high_tramp;
+
+ /* Return address that's closest to the caller address. */
+ if (call_end - low_tramp < high_tramp - call_end)
+ return low_tramp;
+ return high_tramp;
+}
+
+static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ struct mm_struct *mm = current->mm;
+ struct uprobe_trampoline *tramp;
+ struct vm_area_struct *vma;
+
+ if (!user_64bit_mode(regs))
+ return NULL;
+
+ vaddr = find_nearest_trampoline(vaddr);
+ if (IS_ERR_VALUE(vaddr))
+ return NULL;
+
+ tramp = kzalloc(sizeof(*tramp), GFP_KERNEL);
+ if (unlikely(!tramp))
+ return NULL;
+
+ tramp->vaddr = vaddr;
+ vma = _install_special_mapping(mm, tramp->vaddr, PAGE_SIZE,
+ VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
+ &tramp_mapping);
+ if (IS_ERR(vma)) {
+ kfree(tramp);
+ return NULL;
+ }
+ return tramp;
+}
+
+__maybe_unused
+static struct uprobe_trampoline *get_uprobe_trampoline(unsigned long vaddr, bool *new)
+{
+ struct uprobes_state *state = ¤t->mm->uprobes_state;
+ struct uprobe_trampoline *tramp = NULL;
+
+ if (vaddr > TASK_SIZE || vaddr < PAGE_SIZE)
+ return NULL;
+
+ hlist_for_each_entry(tramp, &state->head_tramps, node) {
+ if (is_reachable_by_call(tramp->vaddr, vaddr)) {
+ *new = false;
+ return tramp;
+ }
+ }
+
+ tramp = create_uprobe_trampoline(vaddr);
+ if (!tramp)
+ return NULL;
+
+ *new = true;
+ hlist_add_head(&tramp->node, &state->head_tramps);
+ return tramp;
+}
+
+static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
+{
+ /*
+ * We do not unmap and release uprobe trampoline page itself,
+ * because there's no easy way to make sure none of the threads
+ * is still inside the trampoline.
+ */
+ hlist_del(&tramp->node);
+ kfree(tramp);
+}
+
+void arch_uprobe_init_state(struct mm_struct *mm)
+{
+ INIT_HLIST_HEAD(&mm->uprobes_state.head_tramps);
+}
+
+void arch_uprobe_clear_state(struct mm_struct *mm)
+{
+ struct uprobes_state *state = &mm->uprobes_state;
+ struct uprobe_trampoline *tramp;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(tramp, n, &state->head_tramps, node)
+ destroy_uprobe_trampoline(tramp);
+}
#else /* 32-bit: */
/*
* No RIP-relative addressing on 32-bit
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 5080619..b40d33a 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -17,6 +17,7 @@
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/seqlock.h>
+#include <linux/mutex.h>
struct uprobe;
struct vm_area_struct;
@@ -185,6 +186,9 @@ struct xol_area;
struct uprobes_state {
struct xol_area *xol_area;
+#ifdef CONFIG_X86_64
+ struct hlist_head head_tramps;
+#endif
};
typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr,
@@ -233,6 +237,8 @@ extern void uprobe_handle_trampoline(struct pt_regs *regs);
extern void *arch_uretprobe_trampoline(unsigned long *psize);
extern unsigned long uprobe_get_trampoline_vaddr(void);
extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len);
+extern void arch_uprobe_clear_state(struct mm_struct *mm);
+extern void arch_uprobe_init_state(struct mm_struct *mm);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index da2b3d0..2cd7a4c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1801,6 +1801,14 @@ static struct xol_area *get_xol_area(void)
return area;
}
+void __weak arch_uprobe_clear_state(struct mm_struct *mm)
+{
+}
+
+void __weak arch_uprobe_init_state(struct mm_struct *mm)
+{
+}
+
/*
* uprobe_clear_state - Free the area allocated for slots.
*/
@@ -1812,6 +1820,8 @@ void uprobe_clear_state(struct mm_struct *mm)
delayed_uprobe_remove(NULL, mm);
mutex_unlock(&delayed_uprobe_lock);
+ arch_uprobe_clear_state(mm);
+
if (!area)
return;
diff --git a/kernel/fork.c b/kernel/fork.c
index af67385..d827cc6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1015,6 +1015,7 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
{
#ifdef CONFIG_UPROBES
mm->uprobes_state.xol_area = NULL;
+ arch_uprobe_init_state(mm);
#endif
}
© 2016 - 2025 Red Hat, Inc.