[PATCH v7 08/17] unwind_user/deferred: Add unwind cache

Steven Rostedt posted 17 patches 9 months, 1 week ago
There is a newer version of this series
[PATCH v7 08/17] unwind_user/deferred: Add unwind cache
Posted by Steven Rostedt 9 months, 1 week ago
From: Josh Poimboeuf <jpoimboe@kernel.org>

Cache the results of the unwind to ensure the unwind is only performed
once, even when called by multiple tracers.

The cache nr_entries gets cleared every time the task exits the kernel.
When a stacktrace is requested, nr_entries gets set to the number of
entries in the stacktrace. If another stacktrace is requested, if
nr_entries is not zero, then it contains the same stacktrace that would be
retrieved so it is not processed again and the entries is given to the
caller.

Co-developed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/entry-common.h          |  2 ++
 include/linux/unwind_deferred.h       |  7 +++++++
 include/linux/unwind_deferred_types.h |  7 ++++++-
 kernel/unwind/deferred.c              | 27 ++++++++++++++++++++-------
 4 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index f94f3fdf15fc..6e850c9d3f0c 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -12,6 +12,7 @@
 #include <linux/resume_user_mode.h>
 #include <linux/tick.h>
 #include <linux/kmsan.h>
+#include <linux/unwind_deferred.h>
 
 #include <asm/entry-common.h>
 #include <asm/syscall.h>
@@ -362,6 +363,7 @@ static __always_inline void exit_to_user_mode(void)
 	lockdep_hardirqs_on_prepare();
 	instrumentation_end();
 
+	unwind_exit_to_user_mode();
 	user_enter_irqoff();
 	arch_exit_to_user_mode();
 	lockdep_hardirqs_on(CALLER_ADDR0);
diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h
index 5064ebe38c4f..c2d760e5e257 100644
--- a/include/linux/unwind_deferred.h
+++ b/include/linux/unwind_deferred.h
@@ -12,6 +12,11 @@ void unwind_task_free(struct task_struct *task);
 
 int unwind_deferred_trace(struct unwind_stacktrace *trace);
 
+static __always_inline void unwind_exit_to_user_mode(void)
+{
+	current->unwind_info.cache.nr_entries = 0;
+}
+
 #else /* !CONFIG_UNWIND_USER */
 
 static inline void unwind_task_init(struct task_struct *task) {}
@@ -19,6 +24,8 @@ static inline void unwind_task_free(struct task_struct *task) {}
 
 static inline int unwind_deferred_trace(struct unwind_stacktrace *trace) { return -ENOSYS; }
 
+static inline void unwind_exit_to_user_mode(void) {}
+
 #endif /* !CONFIG_UNWIND_USER */
 
 #endif /* _LINUX_UNWIND_USER_DEFERRED_H */
diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
index aa32db574e43..b3b7389ee6eb 100644
--- a/include/linux/unwind_deferred_types.h
+++ b/include/linux/unwind_deferred_types.h
@@ -2,8 +2,13 @@
 #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
 #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
 
-struct unwind_task_info {
+struct unwind_cache {
 	unsigned long		*entries;
+	unsigned int		nr_entries;
+};
+
+struct unwind_task_info {
+	struct unwind_cache	cache;
 };
 
 #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index 5a3789e38c00..89ed04b1c527 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -12,6 +12,7 @@
 int unwind_deferred_trace(struct unwind_stacktrace *trace)
 {
 	struct unwind_task_info *info = &current->unwind_info;
+	struct unwind_cache *cache = &info->cache;
 
 	/* Should always be called from faultable context */
 	might_fault();
@@ -19,17 +20,29 @@ int unwind_deferred_trace(struct unwind_stacktrace *trace)
 	if (current->flags & PF_EXITING)
 		return -EINVAL;
 
-       if (!info->entries) {
-               info->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long),
-                                             GFP_KERNEL);
-               if (!info->entries)
-		       return -ENOMEM;
+	if (!cache->entries) {
+		cache->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long),
+					       GFP_KERNEL);
+		if (!cache->entries)
+			return -ENOMEM;
+        }
+
+	trace->entries = cache->entries;
+
+	if (cache->nr_entries) {
+               /*
+                * The user stack has already been previously unwound in this
+                * entry context.  Skip the unwind and use the cache.
+                */
+               trace->nr = cache->nr_entries;
+               return 0;
        }
 
 	trace->nr = 0;
-	trace->entries = info->entries;
 	unwind_user(trace, UNWIND_MAX_ENTRIES);
 
+	cache->nr_entries = trace->nr;
+
 	return 0;
 }
 
@@ -44,5 +57,5 @@ void unwind_task_free(struct task_struct *task)
 {
 	struct unwind_task_info *info = &task->unwind_info;
 
-	kfree(info->entries);
+	kfree(info->cache.entries);
 }
-- 
2.47.2
Re: [PATCH v7 08/17] unwind_user/deferred: Add unwind cache
Posted by Ingo Molnar 9 months, 1 week ago
* Steven Rostedt <rostedt@goodmis.org> wrote:

> -struct unwind_task_info {
> +struct unwind_cache {
>  	unsigned long		*entries;
> +	unsigned int		nr_entries;
> +};
> +
> +struct unwind_task_info {
> +	struct unwind_cache	cache;
>  };

> @@ -19,17 +20,29 @@ int unwind_deferred_trace(struct unwind_stacktrace *trace)
>  	if (current->flags & PF_EXITING)
>  		return -EINVAL;
>  
> -       if (!info->entries) {
> -               info->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long),
> -                                             GFP_KERNEL);
> -               if (!info->entries)
> -		       return -ENOMEM;
> +	if (!cache->entries) {
> +		cache->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long),
> +					       GFP_KERNEL);
> +		if (!cache->entries)
> +			return -ENOMEM;
> +        }

That's just sloppy: why isn't the unwind_cache a pointer to begin with, 
with the dynamically allocated object containing ::nr_entries?

Also, the code has whitespace damage.

> +
> +	trace->entries = cache->entries;
> +
> +	if (cache->nr_entries) {
> +               /*
> +                * The user stack has already been previously unwound in this
> +                * entry context.  Skip the unwind and use the cache.
> +                */
> +               trace->nr = cache->nr_entries;
> +               return 0;

Whitespace damage here too. Maybe in other patches as well.

Please don't rush this series without first reviewing it carefully ...

Thanks,

	Ingo
Re: [PATCH v7 08/17] unwind_user/deferred: Add unwind cache
Posted by Steven Rostedt 9 months, 1 week ago
On Sun, 4 May 2025 11:37:54 +0200
Ingo Molnar <mingo@kernel.org> wrote:

Hi Ingo,

> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > -struct unwind_task_info {
> > +struct unwind_cache {
> >  	unsigned long		*entries;
> > +	unsigned int		nr_entries;
> > +};
> > +
> > +struct unwind_task_info {
> > +	struct unwind_cache	cache;
> >  };  
> 
> > @@ -19,17 +20,29 @@ int unwind_deferred_trace(struct unwind_stacktrace *trace)
> >  	if (current->flags & PF_EXITING)
> >  		return -EINVAL;
> >  
> > -       if (!info->entries) {
> > -               info->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long),
> > -                                             GFP_KERNEL);
> > -               if (!info->entries)
> > -		       return -ENOMEM;
> > +	if (!cache->entries) {
> > +		cache->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long),
> > +					       GFP_KERNEL);
> > +		if (!cache->entries)
> > +			return -ENOMEM;
> > +        }  
> 
> That's just sloppy: why isn't the unwind_cache a pointer to begin with, 
> with the dynamically allocated object containing ::nr_entries?

Basically you want?

struct unwind_task_info {
	struct unwind_cache	*cache;
};

Then have:

struct unwind_cache {
	int			nr_entries;
	unsigned long		entries[];
};

And allocate the unwind_cache to include the size (using the dynamic
structure macro helpers)?

That makes sense to me.

> 
> Also, the code has whitespace damage.
> 
> > +
> > +	trace->entries = cache->entries;
> > +
> > +	if (cache->nr_entries) {
> > +               /*
> > +                * The user stack has already been previously
> > unwound in this
> > +                * entry context.  Skip the unwind and use the
> > cache.
> > +                */
> > +               trace->nr = cache->nr_entries;
> > +               return 0;  
> 
> Whitespace damage here too. Maybe in other patches as well.
> 
> Please don't rush this series without first reviewing it carefully ...

Hmm, For whitespace damage, I usually rely on git show to show me where
whitespace damage is. But if there's no tabs, then it doesn't show. The
whitespace damage came from when I pulled in Josh's work and rebased it
on the latest kernel. There were conflicts which was solved by having
to do some cut and paste from .rej files, and somehow it added spaces
instead of tabs.

Peter caught this is the beginning, and I thought I got all the
locations that had that white space issue. This patch hasn't been
touched much during the various versions.

I'll do a search for spaces to see if there's more in any of the other
patches.

Thanks!

-- Steve