From: Josh Poimboeuf <jpoimboe@kernel.org>
Cache the results of the unwind to ensure the unwind is only performed
once, even when called by multiple tracers.
The cache nr_entries gets cleared every time the task exits the kernel.
When a stacktrace is requested, nr_entries gets set to the number of
entries in the stacktrace. If another stacktrace is requested, if
nr_entries is not zero, then it contains the same stacktrace that would be
retrieved so it is not processed again and the entries is given to the
caller.
Co-developed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
include/linux/entry-common.h | 2 ++
include/linux/unwind_deferred.h | 8 +++++++
include/linux/unwind_deferred_types.h | 7 +++++-
kernel/unwind/deferred.c | 31 +++++++++++++++++++++------
4 files changed, 40 insertions(+), 8 deletions(-)
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index f94f3fdf15fc..8908b8eeb99b 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -12,6 +12,7 @@
#include <linux/resume_user_mode.h>
#include <linux/tick.h>
#include <linux/kmsan.h>
+#include <linux/unwind_deferred.h>
#include <asm/entry-common.h>
#include <asm/syscall.h>
@@ -362,6 +363,7 @@ static __always_inline void exit_to_user_mode(void)
lockdep_hardirqs_on_prepare();
instrumentation_end();
+ unwind_reset_info();
user_enter_irqoff();
arch_exit_to_user_mode();
lockdep_hardirqs_on(CALLER_ADDR0);
diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h
index a5f6e8f8a1a2..baacf4a1eb4c 100644
--- a/include/linux/unwind_deferred.h
+++ b/include/linux/unwind_deferred.h
@@ -12,6 +12,12 @@ void unwind_task_free(struct task_struct *task);
int unwind_user_faultable(struct unwind_stacktrace *trace);
+static __always_inline void unwind_reset_info(void)
+{
+ if (unlikely(current->unwind_info.cache))
+ current->unwind_info.cache->nr_entries = 0;
+}
+
#else /* !CONFIG_UNWIND_USER */
static inline void unwind_task_init(struct task_struct *task) {}
@@ -19,6 +25,8 @@ static inline void unwind_task_free(struct task_struct *task) {}
static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; }
+static inline void unwind_reset_info(void) {}
+
#endif /* !CONFIG_UNWIND_USER */
#endif /* _LINUX_UNWIND_USER_DEFERRED_H */
diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
index aa32db574e43..db5b54b18828 100644
--- a/include/linux/unwind_deferred_types.h
+++ b/include/linux/unwind_deferred_types.h
@@ -2,8 +2,13 @@
#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
+struct unwind_cache {
+ unsigned int nr_entries;
+ unsigned long entries[];
+};
+
struct unwind_task_info {
- unsigned long *entries;
+ struct unwind_cache *cache;
};
#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index a0badbeb3cc1..96368a5aa522 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -4,10 +4,13 @@
*/
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/unwind_deferred.h>
-#define UNWIND_MAX_ENTRIES 512
+/* Make the cache fit in a 4K page */
+#define UNWIND_MAX_ENTRIES \
+ ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long))
/**
* unwind_user_faultable - Produce a user stacktrace in faultable context
@@ -24,6 +27,7 @@
int unwind_user_faultable(struct unwind_stacktrace *trace)
{
struct unwind_task_info *info = ¤t->unwind_info;
+ struct unwind_cache *cache;
/* Should always be called from faultable context */
might_fault();
@@ -31,17 +35,30 @@ int unwind_user_faultable(struct unwind_stacktrace *trace)
if (current->flags & PF_EXITING)
return -EINVAL;
- if (!info->entries) {
- info->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long),
- GFP_KERNEL);
- if (!info->entries)
+ if (!info->cache) {
+ info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES),
+ GFP_KERNEL);
+ if (!info->cache)
return -ENOMEM;
}
+ cache = info->cache;
+ trace->entries = cache->entries;
+
+ if (cache->nr_entries) {
+ /*
+ * The user stack has already been previously unwound in this
+ * entry context. Skip the unwind and use the cache.
+ */
+ trace->nr = cache->nr_entries;
+ return 0;
+ }
+
trace->nr = 0;
- trace->entries = info->entries;
unwind_user(trace, UNWIND_MAX_ENTRIES);
+ cache->nr_entries = trace->nr;
+
return 0;
}
@@ -56,5 +73,5 @@ void unwind_task_free(struct task_struct *task)
{
struct unwind_task_info *info = &task->unwind_info;
- kfree(info->entries);
+ kfree(info->cache);
}
--
2.47.2
On 25.07.2025 20:55, Steven Rostedt wrote: > From: Josh Poimboeuf <jpoimboe@kernel.org> > > Cache the results of the unwind to ensure the unwind is only performed > once, even when called by multiple tracers. > > The cache nr_entries gets cleared every time the task exits the kernel. > When a stacktrace is requested, nr_entries gets set to the number of > entries in the stacktrace. If another stacktrace is requested, if > nr_entries is not zero, then it contains the same stacktrace that would be > retrieved so it is not processed again and the entries is given to the > caller. > > Co-developed-by: Steven Rostedt (Google) <rostedt@goodmis.org> > Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> > Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> Reviewed-by: Jens Remus <jremus@linux.ibm.com> > diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c > + cache = info->cache; > + trace->entries = cache->entries; > + > + if (cache->nr_entries) { > + /* > + * The user stack has already been previously unwound in this > + * entry context. Skip the unwind and use the cache. > + */ > + trace->nr = cache->nr_entries; > + return 0; > + } > + > trace->nr = 0; > - trace->entries = info->entries; > unwind_user(trace, UNWIND_MAX_ENTRIES); > > + cache->nr_entries = trace->nr; > + Would the following alternative to above excerpt be easier to read? /* Use the cache, if the user stack has already been previously * unwound in this entry context. If not this will initialize * trace->nr to zero to trigger the unwind now. */ cache = info->cache; trace->nr = cache->nr_entries; trace->entries = cache->entries; if (!trace->nr) { unwind_user(trace, UNWIND_MAX_ENTRIES); cache->nr_entries = trace->nr; } > return 0; > } Regards, Jens -- Jens Remus Linux on Z Development (D3303) +49-7031-16-1128 Office jremus@de.ibm.com IBM IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Böblingen; Registergericht: Amtsgericht Stuttgart, HRB 243294 IBM Data Privacy Statement: https://www.ibm.com/privacy/
On Mon, 28 Jul 2025 17:46:42 +0200 Jens Remus <jremus@linux.ibm.com> wrote: > On 25.07.2025 20:55, Steven Rostedt wrote: > > From: Josh Poimboeuf <jpoimboe@kernel.org> > > > > Cache the results of the unwind to ensure the unwind is only performed > > once, even when called by multiple tracers. > > > > The cache nr_entries gets cleared every time the task exits the kernel. > > When a stacktrace is requested, nr_entries gets set to the number of > > entries in the stacktrace. If another stacktrace is requested, if > > nr_entries is not zero, then it contains the same stacktrace that would be > > retrieved so it is not processed again and the entries is given to the > > caller. > > > > Co-developed-by: Steven Rostedt (Google) <rostedt@goodmis.org> > > Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> > > Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> > > Reviewed-by: Jens Remus <jremus@linux.ibm.com> Thanks. > > > diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c > > > + cache = info->cache; > > + trace->entries = cache->entries; > > + > > + if (cache->nr_entries) { > > + /* > > + * The user stack has already been previously unwound in this > > + * entry context. Skip the unwind and use the cache. > > + */ > > + trace->nr = cache->nr_entries; > > + return 0; > > + } > > + > > trace->nr = 0; > > - trace->entries = info->entries; > > unwind_user(trace, UNWIND_MAX_ENTRIES); > > > > + cache->nr_entries = trace->nr; > > + > > Would the following alternative to above excerpt be easier to read? Not to me ;-) I looked at this and read it a couple of times, but had to go back to see what it was replacing before I understood it. I prefer the original. It's logic is, "if this was already done, just return the cache", where as the below logic is "Assign everything, if it hasn't been done, do it now". Maybe it's just my own preference, but I'm more comfortable with the "if it's already been done, exit out early" than the "set everything up, and do it if it hasn't been done" approach. -- Steve > > /* Use the cache, if the user stack has already been previously > * unwound in this entry context. If not this will initialize > * trace->nr to zero to trigger the unwind now. > */ > cache = info->cache; > trace->nr = cache->nr_entries; > trace->entries = cache->entries; > > if (!trace->nr) { > unwind_user(trace, UNWIND_MAX_ENTRIES); > cache->nr_entries = trace->nr; > } > > > return 0; > > } > > Regards, > Jens
© 2016 - 2025 Red Hat, Inc.