[PATCH 8/8] tracing: Update modules to persistent instances when loaded

Steven Rostedt posted 8 patches 10 months, 1 week ago
There is a newer version of this series
[PATCH 8/8] tracing: Update modules to persistent instances when loaded
Posted by Steven Rostedt 10 months, 1 week ago
From: Steven Rostedt <rostedt@goodmis.org>

When a module is loaded and a persistent buffer is actively tracing, add
it to the list of modules in the persistent memory.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c        | 25 +++++++++++++++++++++++
 kernel/trace/trace.h        |  2 ++
 kernel/trace/trace_events.c | 40 ++++++++++++++++++++++++++-----------
 3 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7b4027804cd4..443f2bc5b856 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10088,6 +10088,30 @@ static void trace_module_remove_evals(struct module *mod)
 static inline void trace_module_remove_evals(struct module *mod) { }
 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 
+static bool trace_array_active(struct trace_array *tr)
+{
+	if (tr->current_trace != &nop_trace)
+		return true;
+
+	/* 0 is no events, 1 is all disabled */
+	return trace_events_enabled(tr, NULL) > 1;
+}
+
+static void trace_module_record(struct module *mod)
+{
+	struct trace_array *tr;
+
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		/* Update any persistent trace array that has already been started */
+		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
+		    TRACE_ARRAY_FL_BOOT) {
+			/* Only update if the trace array is active */
+			if (trace_array_active(tr))
+				save_mod(mod, tr);
+		}
+	}
+}
+
 static int trace_module_notify(struct notifier_block *self,
 			       unsigned long val, void *data)
 {
@@ -10096,6 +10120,7 @@ static int trace_module_notify(struct notifier_block *self,
 	switch (val) {
 	case MODULE_STATE_COMING:
 		trace_module_add_evals(mod);
+		trace_module_record(mod);
 		break;
 	case MODULE_STATE_GOING:
 		trace_module_remove_evals(mod);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3a020fb82a34..90493220c362 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -786,6 +786,8 @@ extern void trace_find_cmdline(int pid, char comm[]);
 extern int trace_find_tgid(int pid);
 extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
 
+extern int trace_events_enabled(struct trace_array *tr, const char *system);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
 extern unsigned long ftrace_number_of_pages;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4cb275316e51..107767afe0ab 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1811,28 +1811,28 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	return cnt;
 }
 
-static ssize_t
-system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
-		   loff_t *ppos)
+/*
+ * Returns:
+ *   0 : no events exist?
+ *   1 : all events are disabled
+ *   2 : all events are enabled
+ *   3 : some events are enabled and some are enabled
+ */
+int trace_events_enabled(struct trace_array *tr, const char *system)
 {
-	const char set_to_char[4] = { '?', '0', '1', 'X' };
-	struct trace_subsystem_dir *dir = filp->private_data;
-	struct event_subsystem *system = dir->subsystem;
 	struct trace_event_call *call;
 	struct trace_event_file *file;
-	struct trace_array *tr = dir->tr;
-	char buf[2];
 	int set = 0;
-	int ret;
 
-	mutex_lock(&event_mutex);
+	guard(mutex)(&event_mutex);
+
 	list_for_each_entry(file, &tr->events, list) {
 		call = file->event_call;
 		if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
 		    !trace_event_name(call) || !call->class || !call->class->reg)
 			continue;
 
-		if (system && strcmp(call->class->system, system->name) != 0)
+		if (system && strcmp(call->class->system, system) != 0)
 			continue;
 
 		/*
@@ -1848,7 +1848,23 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 		if (set == 3)
 			break;
 	}
-	mutex_unlock(&event_mutex);
+
+	return set;
+}
+
+static ssize_t
+system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+		   loff_t *ppos)
+{
+	const char set_to_char[4] = { '?', '0', '1', 'X' };
+	struct trace_subsystem_dir *dir = filp->private_data;
+	struct event_subsystem *system = dir->subsystem;
+	struct trace_array *tr = dir->tr;
+	char buf[2];
+	int set;
+	int ret;
+
+	set = trace_events_enabled(tr, system ? system->name : NULL);
 
 	buf[0] = set_to_char[set];
 	buf[1] = '\n';
-- 
2.45.2
Re: [PATCH 8/8] tracing: Update modules to persistent instances when loaded
Posted by Masami Hiramatsu (Google) 10 months, 1 week ago
On Wed, 05 Feb 2025 17:50:39 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> From: Steven Rostedt <rostedt@goodmis.org>
> 
> When a module is loaded and a persistent buffer is actively tracing, add
> it to the list of modules in the persistent memory.
> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
>  kernel/trace/trace.c        | 25 +++++++++++++++++++++++
>  kernel/trace/trace.h        |  2 ++
>  kernel/trace/trace_events.c | 40 ++++++++++++++++++++++++++-----------
>  3 files changed, 55 insertions(+), 12 deletions(-)
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 7b4027804cd4..443f2bc5b856 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -10088,6 +10088,30 @@ static void trace_module_remove_evals(struct module *mod)
>  static inline void trace_module_remove_evals(struct module *mod) { }
>  #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
>  
> +static bool trace_array_active(struct trace_array *tr)
> +{
> +	if (tr->current_trace != &nop_trace)
> +		return true;
> +
> +	/* 0 is no events, 1 is all disabled */
> +	return trace_events_enabled(tr, NULL) > 1;
> +}
> +
> +static void trace_module_record(struct module *mod)
> +{
> +	struct trace_array *tr;
> +
> +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> +		/* Update any persistent trace array that has already been started */
> +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> +		    TRACE_ARRAY_FL_BOOT) {
> +			/* Only update if the trace array is active */
> +			if (trace_array_active(tr))

Do we really need this check? It seems that we can just save_mod() if the
above condition is true.

> +				save_mod(mod, tr);
> +		}
> +	}
> +}
> +
>  static int trace_module_notify(struct notifier_block *self,
>  			       unsigned long val, void *data)
>  {
> @@ -10096,6 +10120,7 @@ static int trace_module_notify(struct notifier_block *self,
>  	switch (val) {
>  	case MODULE_STATE_COMING:
>  		trace_module_add_evals(mod);
> +		trace_module_record(mod);
>  		break;
>  	case MODULE_STATE_GOING:
>  		trace_module_remove_evals(mod);

Don't we need to remove the module entry when a module is removed?
(everytime we remove a module, trace data is cleared?)

Thank you,

> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 3a020fb82a34..90493220c362 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -786,6 +786,8 @@ extern void trace_find_cmdline(int pid, char comm[]);
>  extern int trace_find_tgid(int pid);
>  extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
>  
> +extern int trace_events_enabled(struct trace_array *tr, const char *system);
> +
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  extern unsigned long ftrace_update_tot_cnt;
>  extern unsigned long ftrace_number_of_pages;
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 4cb275316e51..107767afe0ab 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -1811,28 +1811,28 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
>  	return cnt;
>  }
>  
> -static ssize_t
> -system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
> -		   loff_t *ppos)
> +/*
> + * Returns:
> + *   0 : no events exist?
> + *   1 : all events are disabled
> + *   2 : all events are enabled
> + *   3 : some events are enabled and some are enabled
> + */
> +int trace_events_enabled(struct trace_array *tr, const char *system)
>  {
> -	const char set_to_char[4] = { '?', '0', '1', 'X' };
> -	struct trace_subsystem_dir *dir = filp->private_data;
> -	struct event_subsystem *system = dir->subsystem;
>  	struct trace_event_call *call;
>  	struct trace_event_file *file;
> -	struct trace_array *tr = dir->tr;
> -	char buf[2];
>  	int set = 0;
> -	int ret;
>  
> -	mutex_lock(&event_mutex);
> +	guard(mutex)(&event_mutex);
> +
>  	list_for_each_entry(file, &tr->events, list) {
>  		call = file->event_call;
>  		if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
>  		    !trace_event_name(call) || !call->class || !call->class->reg)
>  			continue;
>  
> -		if (system && strcmp(call->class->system, system->name) != 0)
> +		if (system && strcmp(call->class->system, system) != 0)
>  			continue;
>  
>  		/*
> @@ -1848,7 +1848,23 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
>  		if (set == 3)
>  			break;
>  	}
> -	mutex_unlock(&event_mutex);
> +
> +	return set;
> +}
> +
> +static ssize_t
> +system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
> +		   loff_t *ppos)
> +{
> +	const char set_to_char[4] = { '?', '0', '1', 'X' };
> +	struct trace_subsystem_dir *dir = filp->private_data;
> +	struct event_subsystem *system = dir->subsystem;
> +	struct trace_array *tr = dir->tr;
> +	char buf[2];
> +	int set;
> +	int ret;
> +
> +	set = trace_events_enabled(tr, system ? system->name : NULL);
>  
>  	buf[0] = set_to_char[set];
>  	buf[1] = '\n';
> -- 
> 2.45.2
> 
> 
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>
Re: [PATCH 8/8] tracing: Update modules to persistent instances when loaded
Posted by Steven Rostedt 10 months, 1 week ago
On Thu, 6 Feb 2025 19:01:24 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> > +static void trace_module_record(struct module *mod)
> > +{
> > +	struct trace_array *tr;
> > +
> > +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> > +		/* Update any persistent trace array that has already been started */
> > +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> > +		    TRACE_ARRAY_FL_BOOT) {
> > +			/* Only update if the trace array is active */
> > +			if (trace_array_active(tr))  
> 
> Do we really need this check? It seems that we can just save_mod() if the
> above condition is true.

It gets a little more  complicated if we need to add and remove modules.

> 
> > +				save_mod(mod, tr);
> > +		}
> > +	}
> > +}
> > +
> >  static int trace_module_notify(struct notifier_block *self,
> >  			       unsigned long val, void *data)
> >  {
> > @@ -10096,6 +10120,7 @@ static int trace_module_notify(struct notifier_block *self,
> >  	switch (val) {
> >  	case MODULE_STATE_COMING:
> >  		trace_module_add_evals(mod);
> > +		trace_module_record(mod);
> >  		break;
> >  	case MODULE_STATE_GOING:
> >  		trace_module_remove_evals(mod);  
> 
> Don't we need to remove the module entry when a module is removed?
> (everytime we remove a module, trace data is cleared?)

I do have a patch that that removes entries, but I decided we don't really
want to do that.

If we want to have events for modules that were removed. Note, the ring
buffer is cleared if any module event was ever enabled and then the module
is removed, as how to print it is removed too. But we could disable that
for the persistent ring buffers as they should not be using the default
trace event print format anyway.

As for stack traces, we still want the module it was for when the stack
trace happens. A common bug we see is when a module is removed, it can
cause other bugs. We want to know about modules that were removed. Keeping
that information about removed modules will allow us to see what functions
were called by a stack trace for a module that was removed.

-- Steve
Re: [PATCH 8/8] tracing: Update modules to persistent instances when loaded
Posted by Masami Hiramatsu (Google) 10 months, 1 week ago
On Thu, 6 Feb 2025 10:36:12 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 6 Feb 2025 19:01:24 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> 
> > > +static void trace_module_record(struct module *mod)
> > > +{
> > > +	struct trace_array *tr;
> > > +
> > > +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> > > +		/* Update any persistent trace array that has already been started */
> > > +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> > > +		    TRACE_ARRAY_FL_BOOT) {
> > > +			/* Only update if the trace array is active */
> > > +			if (trace_array_active(tr))  
> > 
> > Do we really need this check? It seems that we can just save_mod() if the
> > above condition is true.
> 
> It gets a little more  complicated if we need to add and remove modules.

Yeah, but for converting the module address, we don't want to see other
module information.

> 
> > 
> > > +				save_mod(mod, tr);
> > > +		}
> > > +	}
> > > +}
> > > +
> > >  static int trace_module_notify(struct notifier_block *self,
> > >  			       unsigned long val, void *data)
> > >  {
> > > @@ -10096,6 +10120,7 @@ static int trace_module_notify(struct notifier_block *self,
> > >  	switch (val) {
> > >  	case MODULE_STATE_COMING:
> > >  		trace_module_add_evals(mod);
> > > +		trace_module_record(mod);
> > >  		break;
> > >  	case MODULE_STATE_GOING:
> > >  		trace_module_remove_evals(mod);  
> > 
> > Don't we need to remove the module entry when a module is removed?
> > (everytime we remove a module, trace data is cleared?)
> 
> I do have a patch that that removes entries, but I decided we don't really
> want to do that.
> 
> If we want to have events for modules that were removed. Note, the ring
> buffer is cleared if any module event was ever enabled and then the module
> is removed, as how to print it is removed too. But we could disable that
> for the persistent ring buffers as they should not be using the default
> trace event print format anyway.

Yeah, if the event is on the module the buffer is cleared.
But the module address can be in the stacktrace. In that case, the event
in the module is not enabled, but other events like sched_switch can
take stacktrace which can include the module address. In that case, the
buffer is also cleared when the module is removed?

> As for stack traces, we still want the module it was for when the stack
> trace happens. A common bug we see is when a module is removed, it can
> cause other bugs. We want to know about modules that were removed. Keeping
> that information about removed modules will allow us to see what functions
> were called by a stack trace for a module that was removed.

Hmm, but that should be covered by module load/unload events?

Anyway, this series does not cover the module text address in the stacktrace.
I just made a series of patches (which also not cover the module removal yet),
but it can show the basic idea.

My idea is to sort the previous module entries in the persistent buffer
when it is setup. We also make a "module_delta" array in the trace_array.
Then the print function can searche the appropriate module info from
the sorted table and find corresponding delta from "module_delta".

For example,

/sys/kernel/tracing/instances/boot_mapped # cat trace
           <...>-1629    [006] .....   202.860051: foo_bar_with_fn: foo Look at me 4
           <...>-1629    [006] .....   202.860059: <stack trace>
 => trace_event_raw_event_foo_bar_with_fn
 => simple_thread_fn
 => kthread
 => ret_from_fork
 => ret_from_fork_asm
/sys/kernel/tracing/instances/boot_mapped # cat last_boot_info 
Offset: 0
ffffffffa0016000 trace_events_sample
ffffffffa0025000 trace_printk
/sys/kernel/tracing/instances/boot_mapped # lsmod 

trace_events_sample 45056 0 - Live 0xffffffffa001c000
trace_printk 12288 0 - Live 0xffffffffa0016000

Let me share it.

Thank you,

-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>
Re: [PATCH 8/8] tracing: Update modules to persistent instances when loaded
Posted by Steven Rostedt 10 months, 1 week ago
On Fri, 7 Feb 2025 01:53:30 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> On Thu, 6 Feb 2025 10:36:12 -0500
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Thu, 6 Feb 2025 19:01:24 +0900
> > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> >   
> > > > +static void trace_module_record(struct module *mod)
> > > > +{
> > > > +	struct trace_array *tr;
> > > > +
> > > > +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> > > > +		/* Update any persistent trace array that has already been started */
> > > > +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> > > > +		    TRACE_ARRAY_FL_BOOT) {
> > > > +			/* Only update if the trace array is active */
> > > > +			if (trace_array_active(tr))    
> > > 
> > > Do we really need this check? It seems that we can just save_mod() if the
> > > above condition is true.  
> > 
> > It gets a little more  complicated if we need to add and remove modules.  
> 
> Yeah, but for converting the module address, we don't want to see other
> module information.

But we want to see the module information for modules that were loaded
during the trace. If a module is removed and suddenly the system crashed,
we just lost that information. Hence why I reset the module information
when the tracing starts.


> > If we want to have events for modules that were removed. Note, the ring
> > buffer is cleared if any module event was ever enabled and then the module
> > is removed, as how to print it is removed too. But we could disable that
> > for the persistent ring buffers as they should not be using the default
> > trace event print format anyway.  
> 
> Yeah, if the event is on the module the buffer is cleared.
> But the module address can be in the stacktrace. In that case, the event
> in the module is not enabled, but other events like sched_switch can
> take stacktrace which can include the module address. In that case, the
> buffer is also cleared when the module is removed?

No. The buffer is only cleared if one of its events were ever enabled. If
no event within the module was enabled, then the buffer is not cleared.

> 
> > As for stack traces, we still want the module it was for when the stack
> > trace happens. A common bug we see is when a module is removed, it can
> > cause other bugs. We want to know about modules that were removed. Keeping
> > that information about removed modules will allow us to see what functions
> > were called by a stack trace for a module that was removed.  
> 
> Hmm, but that should be covered by module load/unload events?

If we have them enabled.

-- Steve
Re: [PATCH 8/8] tracing: Update modules to persistent instances when loaded
Posted by Masami Hiramatsu (Google) 10 months, 1 week ago
On Thu, 6 Feb 2025 12:18:20 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 7 Feb 2025 01:53:30 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> 
> > On Thu, 6 Feb 2025 10:36:12 -0500
> > Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > On Thu, 6 Feb 2025 19:01:24 +0900
> > > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> > >   
> > > > > +static void trace_module_record(struct module *mod)
> > > > > +{
> > > > > +	struct trace_array *tr;
> > > > > +
> > > > > +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> > > > > +		/* Update any persistent trace array that has already been started */
> > > > > +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> > > > > +		    TRACE_ARRAY_FL_BOOT) {
> > > > > +			/* Only update if the trace array is active */
> > > > > +			if (trace_array_active(tr))    
> > > > 
> > > > Do we really need this check? It seems that we can just save_mod() if the
> > > > above condition is true.  
> > > 
> > > It gets a little more  complicated if we need to add and remove modules.  
> > 
> > Yeah, but for converting the module address, we don't want to see other
> > module information.
> 
> But we want to see the module information for modules that were loaded
> during the trace. If a module is removed and suddenly the system crashed,
> we just lost that information. Hence why I reset the module information
> when the tracing starts.

Then, what about removing module info if the address is recycled for
new module? We can keep it until the same address range is used by other
module(s).

> 
> 
> > > If we want to have events for modules that were removed. Note, the ring
> > > buffer is cleared if any module event was ever enabled and then the module
> > > is removed, as how to print it is removed too. But we could disable that
> > > for the persistent ring buffers as they should not be using the default
> > > trace event print format anyway.  
> > 
> > Yeah, if the event is on the module the buffer is cleared.
> > But the module address can be in the stacktrace. In that case, the event
> > in the module is not enabled, but other events like sched_switch can
> > take stacktrace which can include the module address. In that case, the
> > buffer is also cleared when the module is removed?
> 
> No. The buffer is only cleared if one of its events were ever enabled. If
> no event within the module was enabled, then the buffer is not cleared.
> 
> > 
> > > As for stack traces, we still want the module it was for when the stack
> > > trace happens. A common bug we see is when a module is removed, it can
> > > cause other bugs. We want to know about modules that were removed. Keeping
> > > that information about removed modules will allow us to see what functions
> > > were called by a stack trace for a module that was removed.  
> > 
> > Hmm, but that should be covered by module load/unload events?
> 
> If we have them enabled.

Yes, and current module load event does not cover the address. I think
we can add a new event to log it.

Thank you,

> 
> -- Steve


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>
[PATCH 1/3] tracing: Skip update_last_data() if it is already updated
Posted by Masami Hiramatsu (Google) 10 months, 1 week ago
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

If the last boot data is already cleared, there is no reason to update it
again. Skip if the TRACE_ARRAY_FL_LAST_BOOT is cleared.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace.c |   10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 443f2bc5b856..0f010a34de84 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6041,6 +6041,12 @@ static void update_last_data(struct trace_array *tr)
 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
 		return;
 
+	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+		return;
+
+	/* Only if the buffer has previous boot data clear and update it. */
+	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
+
 	/* Reset the module list and reload them */
 	if (tr->scratch) {
 		struct trace_scratch *tscratch = tr->scratch;
@@ -6052,9 +6058,6 @@ static void update_last_data(struct trace_array *tr)
 		module_for_each_mod(save_mod, tr);
 	}
 
-	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
-		return;
-
 	/*
 	 * Need to clear all CPU buffers as there cannot be events
 	 * from the previous boot mixed with events with this boot
@@ -6077,7 +6080,6 @@ static void update_last_data(struct trace_array *tr)
 #else
 	tscratch->kaslr_addr = 0;
 #endif
-	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
 }
 
 /**
[PATCH 2/3] tracing: Remove checking the activity when module map is updating
Posted by Masami Hiramatsu (Google) 10 months, 1 week ago
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Remove unnecessary active check because tr->flags already checks it.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace.c |   13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0f010a34de84..5a064e712fd7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10090,15 +10090,6 @@ static void trace_module_remove_evals(struct module *mod)
 static inline void trace_module_remove_evals(struct module *mod) { }
 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 
-static bool trace_array_active(struct trace_array *tr)
-{
-	if (tr->current_trace != &nop_trace)
-		return true;
-
-	/* 0 is no events, 1 is all disabled */
-	return trace_events_enabled(tr, NULL) > 1;
-}
-
 static void trace_module_record(struct module *mod)
 {
 	struct trace_array *tr;
@@ -10107,9 +10098,7 @@ static void trace_module_record(struct module *mod)
 		/* Update any persistent trace array that has already been started */
 		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
 		    TRACE_ARRAY_FL_BOOT) {
-			/* Only update if the trace array is active */
-			if (trace_array_active(tr))
-				save_mod(mod, tr);
+			save_mod(mod, tr);
 		}
 	}
 }
Re: [PATCH 2/3] tracing: Remove checking the activity when module map is updating
Posted by Steven Rostedt 9 months, 1 week ago
On Fri,  7 Feb 2025 01:58:56 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Remove unnecessary active check because tr->flags already checks it.

I've thought this over, and sure, if we start tracing on a persistent ring
buffer, then it can add all modules loaded from then on even if it it's not
tracing.

Can you merge patches 1 and 2, rebase it on ring-buffer/for-next and
resubmit this as one patch?

Thanks,

-- Steve
Re: [PATCH 2/3] tracing: Remove checking the activity when module map is updating
Posted by Masami Hiramatsu (Google) 9 months, 1 week ago
On Fri, 7 Mar 2025 10:21:37 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri,  7 Feb 2025 01:58:56 +0900
> "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> 
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > 
> > Remove unnecessary active check because tr->flags already checks it.
> 
> I've thought this over, and sure, if we start tracing on a persistent ring
> buffer, then it can add all modules loaded from then on even if it it's not
> tracing.
> 
> Can you merge patches 1 and 2, rebase it on ring-buffer/for-next and
> resubmit this as one patch?

Oops, I've missed this message. OK, let me rebase it.

Thanks,

> 
> Thanks,
> 
> -- Steve


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>
[PATCH 3/3] tracing: Show last module text symbols in the stacktrace
Posted by Masami Hiramatsu (Google) 10 months, 1 week ago
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Since the previous boot trace buffer can include module text address in
the stacktrace. As same as the kernel text address, convert the module
text address using the module address information.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace.c        |   76 +++++++++++++++++++++++++++++++++++++++++--
 kernel/trace/trace.h        |    4 ++
 kernel/trace/trace_output.c |    3 +-
 3 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5a064e712fd7..9d942b7c2651 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -49,6 +49,7 @@
 #include <linux/fsnotify.h>
 #include <linux/irq_work.h>
 #include <linux/workqueue.h>
+#include <linux/sort.h>
 
 #include <asm/setup.h> /* COMMAND_LINE_SIZE and kaslr_offset() */
 
@@ -6007,6 +6008,27 @@ struct trace_scratch {
 
 static DEFINE_MUTEX(scratch_mutex);
 
+unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
+{
+	struct trace_scratch *tscratch;
+	int i;
+
+	/* If we don't have last boot delta, return the address */
+	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+		return addr;
+
+	tscratch = tr->scratch;
+	if (tscratch && tr->module_delta && tscratch->entries[0].mod_addr < addr) {
+		/* Note that entries are sorted */
+		for (i = 0; i < tr->nr_modules; i++)
+			if (addr < tscratch->entries[i].mod_addr)
+				break;
+		return addr + tr->module_delta[i - 1];
+	}
+
+	return addr + tr->text_delta;
+}
+
 static int save_mod(struct module *mod, void *data)
 {
 	struct trace_array *tr = data;
@@ -6068,6 +6090,9 @@ static void update_last_data(struct trace_array *tr)
 
 	/* Using current data now */
 	tr->text_delta = 0;
+	kfree(tr->module_delta);
+	tr->module_delta = NULL;
+	tr->nr_modules = 0;
 
 	if (!tr->scratch)
 		return;
@@ -9351,10 +9376,37 @@ static struct dentry *trace_instance_dir;
 static void
 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
 
+static int make_mod_delta(struct module *mod, void *data)
+{
+	struct trace_scratch *tscratch;
+	struct trace_mod_entry *entry;
+	struct trace_array *tr = data;
+	int i;
+
+	tscratch = tr->scratch;
+	for (i = 0; i < tr->nr_modules; i++) {
+		entry = &tscratch->entries[i];
+		if (!strcmp(mod->name, entry->mod_name)) {
+			tr->module_delta[i] = (unsigned long)mod->mem[MOD_TEXT].base - entry->mod_addr;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int mod_addr_comp(const void *a, const void *b, const void *data)
+{
+	const struct trace_mod_entry *e1 = a;
+	const struct trace_mod_entry *e2 = b;
+
+	return e1->mod_addr - e2->mod_addr;
+}
+
 static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned int size)
 {
 	struct trace_scratch *tscratch = scratch;
 	struct trace_mod_entry *entry;
+	int i, nr_entries;
 
 	if (!scratch)
 		return;
@@ -9371,7 +9423,7 @@ static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned
 		goto reset;
 
 	/* Check if each module name is a valid string */
-	for (int i = 0; i < tscratch->nr_entries; i++) {
+	for (i = 0; i < tscratch->nr_entries; i++) {
 		int n;
 
 		entry = &tscratch->entries[i];
@@ -9385,6 +9437,20 @@ static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned
 		if (n == MODULE_NAME_LEN)
 			goto reset;
 	}
+	nr_entries = i;
+	/* Allocate module delta array */
+	tr->module_delta = kcalloc(nr_entries, sizeof(long), GFP_KERNEL);
+	if (!tr->module_delta)
+		goto reset;
+	tr->nr_modules = nr_entries;
+
+	/* Sort module table by base address. */
+	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
+		mod_addr_comp, NULL, NULL);
+
+	/* Scan modules */
+	module_for_each_mod(make_mod_delta, tr);
+
 	return;
  reset:
 	/* Invalid trace modules */
@@ -10093,12 +10159,16 @@ static inline void trace_module_remove_evals(struct module *mod) { }
 static void trace_module_record(struct module *mod)
 {
 	struct trace_array *tr;
+	unsigned long flags;
 
 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
 		/* Update any persistent trace array that has already been started */
-		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
-		    TRACE_ARRAY_FL_BOOT) {
+		if (flags == TRACE_ARRAY_FL_BOOT) {
 			save_mod(mod, tr);
+		} else if (flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) {
+			/* Update delta if the module loaded in previous boot */
+			make_mod_delta(mod, tr);
 		}
 	}
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 90493220c362..47c0742fe9ec 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -349,6 +349,8 @@ struct trace_array {
 	unsigned long		range_addr_start;
 	unsigned long		range_addr_size;
 	long			text_delta;
+	int			nr_modules;
+	long			*module_delta;
 	void			*scratch; /* pointer in persistent memory */
 	int			scratch_size;
 
@@ -465,6 +467,8 @@ extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
 
 extern bool trace_clock_in_ns(struct trace_array *tr);
 
+extern unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr);
+
 /*
  * The global tracer (top) should be the first trace array added,
  * but we check the flag anyway.
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 03d56f711ad1..a5336c4ece8e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1248,7 +1248,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 	struct trace_seq *s = &iter->seq;
 	unsigned long *p;
 	unsigned long *end;
-	long delta = iter->tr->text_delta;
 
 	trace_assign_type(field, iter->ent);
 	end = (unsigned long *)((long)iter->ent + iter->ent_size);
@@ -1265,7 +1264,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 			trace_seq_puts(s, "[FTRACE TRAMPOLINE]\n");
 			continue;
 		}
-		seq_print_ip_sym(s, (*p) + delta, flags);
+		seq_print_ip_sym(s, trace_adjust_address(iter->tr, *p), flags);
 		trace_seq_putc(s, '\n');
 	}
Re: [PATCH 3/3] tracing: Show last module text symbols in the stacktrace
Posted by Steven Rostedt 10 months, 1 week ago
On Fri,  7 Feb 2025 01:59:05 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Since the previous boot trace buffer can include module text address in
> the stacktrace. As same as the kernel text address, convert the module
> text address using the module address information.

Note, this doesn't look like it depends on the first two patches, which I
don't think we want yet. As that assumes we never disable the buffer once
we start it, and may start it again. But that's a debate we can have later.

> 
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> ---
>  kernel/trace/trace.c        |   76 +++++++++++++++++++++++++++++++++++++++++--
>  kernel/trace/trace.h        |    4 ++
>  kernel/trace/trace_output.c |    3 +-
>  3 files changed, 78 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 5a064e712fd7..9d942b7c2651 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -49,6 +49,7 @@
>  #include <linux/fsnotify.h>
>  #include <linux/irq_work.h>
>  #include <linux/workqueue.h>
> +#include <linux/sort.h>
>  
>  #include <asm/setup.h> /* COMMAND_LINE_SIZE and kaslr_offset() */
>  
> @@ -6007,6 +6008,27 @@ struct trace_scratch {
>  
>  static DEFINE_MUTEX(scratch_mutex);
>  
> +unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
> +{
> +	struct trace_scratch *tscratch;
> +	int i;
> +
> +	/* If we don't have last boot delta, return the address */
> +	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
> +		return addr;
> +
> +	tscratch = tr->scratch;
> +	if (tscratch && tr->module_delta && tscratch->entries[0].mod_addr < addr) {
> +		/* Note that entries are sorted */
> +		for (i = 0; i < tr->nr_modules; i++)
> +			if (addr < tscratch->entries[i].mod_addr)
> +				break;
> +		return addr + tr->module_delta[i - 1];
> +	}
> +
> +	return addr + tr->text_delta;
> +}
> +
>  static int save_mod(struct module *mod, void *data)
>  {
>  	struct trace_array *tr = data;
> @@ -6068,6 +6090,9 @@ static void update_last_data(struct trace_array *tr)
>  
>  	/* Using current data now */
>  	tr->text_delta = 0;
> +	kfree(tr->module_delta);
> +	tr->module_delta = NULL;
> +	tr->nr_modules = 0;
>  
>  	if (!tr->scratch)
>  		return;

The above could probably go after the check for !tr->scratch.

> @@ -9351,10 +9376,37 @@ static struct dentry *trace_instance_dir;
>  static void
>  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
>  
> +static int make_mod_delta(struct module *mod, void *data)
> +{
> +	struct trace_scratch *tscratch;
> +	struct trace_mod_entry *entry;
> +	struct trace_array *tr = data;
> +	int i;
> +
> +	tscratch = tr->scratch;
> +	for (i = 0; i < tr->nr_modules; i++) {
> +		entry = &tscratch->entries[i];
> +		if (!strcmp(mod->name, entry->mod_name)) {
> +			tr->module_delta[i] = (unsigned long)mod->mem[MOD_TEXT].base - entry->mod_addr;
> +			return 1;

Returning 1 causes the module_loop to stop. That is, this will only process
the first module it finds and then no more after that.

> +		}
> +	}

Doesn't this assume that we have the same modules loaded?

Note, I do plan on adding another field in the trace_scratch structure that
has the uname, then only allow the ascii trace to use the current kallsyms
if the uname matches.

> +	return 0;
> +}
> +
> +static int mod_addr_comp(const void *a, const void *b, const void *data)
> +{
> +	const struct trace_mod_entry *e1 = a;
> +	const struct trace_mod_entry *e2 = b;
> +
> +	return e1->mod_addr - e2->mod_addr;

Hmm, could this possibly cause an overflow issue? If the two addresses are
more than 32 bits apart?

> +}
> +
>  static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned int size)
>  {
>  	struct trace_scratch *tscratch = scratch;
>  	struct trace_mod_entry *entry;
> +	int i, nr_entries;
>  
>  	if (!scratch)
>  		return;
> @@ -9371,7 +9423,7 @@ static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned
>  		goto reset;
>  
>  	/* Check if each module name is a valid string */
> -	for (int i = 0; i < tscratch->nr_entries; i++) {
> +	for (i = 0; i < tscratch->nr_entries; i++) {
>  		int n;
>  
>  		entry = &tscratch->entries[i];
> @@ -9385,6 +9437,20 @@ static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned
>  		if (n == MODULE_NAME_LEN)
>  			goto reset;
>  	}
> +	nr_entries = i;
> +	/* Allocate module delta array */
> +	tr->module_delta = kcalloc(nr_entries, sizeof(long), GFP_KERNEL);
> +	if (!tr->module_delta)

I wonder if this should trigger a WARN_ON()?

> +		goto reset;
> +	tr->nr_modules = nr_entries;
> +
> +	/* Sort module table by base address. */
> +	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
> +		mod_addr_comp, NULL, NULL);
> +
> +	/* Scan modules */
> +	module_for_each_mod(make_mod_delta, tr);
> +
>  	return;
>   reset:
>  	/* Invalid trace modules */


BTW, here's my removal patch:

-- Steve


From 521bc0e5286c93eb4be981d22a6c05043a78b555 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 5 Feb 2025 11:55:51 -0500
Subject: [PATCH] tracing: Update modules to persistent instances when removed

When a module is removed, remove it from the list of saved modules in the
persistent memory to any persistent instance that is currently active.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 69 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 443f2bc5b856..13eeb8df8b8c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6001,6 +6001,7 @@ struct trace_mod_entry {
 
 struct trace_scratch {
 	unsigned long		kaslr_addr;
+	unsigned long		first_free_slot;
 	unsigned long		nr_entries;
 	struct trace_mod_entry	entries[];
 };
@@ -6012,6 +6013,7 @@ static int save_mod(struct module *mod, void *data)
 	struct trace_array *tr = data;
 	struct trace_scratch *tscratch;
 	struct trace_mod_entry *entry;
+	unsigned int idx;
 	unsigned int size;
 
 	guard(mutex)(&scratch_mutex);
@@ -6021,16 +6023,60 @@ static int save_mod(struct module *mod, void *data)
 		return -1;
 	size = tr->scratch_size;
 
-	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
-		return -1;
+	idx = tscratch->first_free_slot < tscratch->nr_entries ?
+		tscratch->first_free_slot : tscratch->nr_entries;
 
-	entry = &tscratch->entries[tscratch->nr_entries];
+	if (struct_size(tscratch, entries, idx + 1) > size)
+		return -1;
 
-	tscratch->nr_entries++;
+	entry = &tscratch->entries[idx];
 
 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
 	strscpy(entry->mod_name, mod->name);
 
+	if (idx == tscratch->nr_entries)
+		tscratch->nr_entries++;
+
+	for (idx++; idx < tscratch->nr_entries; idx++) {
+		entry = &tscratch->entries[idx];
+		if (!entry->mod_addr)
+			break;
+	}
+
+	tscratch->first_free_slot = idx;
+
+	return 0;
+}
+
+static int remove_mod(struct module *mod, void *data)
+{
+	struct trace_array *tr = data;
+	struct trace_scratch *tscratch;
+	struct trace_mod_entry *entry;
+	unsigned int idx;
+	unsigned int size;
+
+	guard(mutex)(&scratch_mutex);
+
+	tscratch = tr->scratch;
+	if (!tscratch)
+		return -1;
+	size = tr->scratch_size;
+
+	for (idx = 0; idx < tscratch->nr_entries; idx++) {
+		entry = &tscratch->entries[idx];
+		if (entry->mod_addr == (unsigned long)mod->mem[MOD_TEXT].base)
+			break;
+	}
+	if (idx == tscratch->nr_entries)
+		return -1;
+
+	if (idx < tscratch->first_free_slot)
+		tscratch->first_free_slot = idx;
+
+	entry->mod_addr = 0;
+	entry->mod_name[0] = '\0';
+
 	return 0;
 }
 
@@ -10112,6 +10158,20 @@ static void trace_module_record(struct module *mod)
 	}
 }
 
+static void trace_module_delete(struct module *mod)
+{
+	struct trace_array *tr;
+
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		/* Update any persistent trace array that has already been started */
+		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
+		    TRACE_ARRAY_FL_BOOT) {
+			if (trace_array_active(tr))
+				remove_mod(mod, tr);
+		}
+	}
+}
+
 static int trace_module_notify(struct notifier_block *self,
 			       unsigned long val, void *data)
 {
@@ -10124,6 +10184,7 @@ static int trace_module_notify(struct notifier_block *self,
 		break;
 	case MODULE_STATE_GOING:
 		trace_module_remove_evals(mod);
+		trace_module_delete(mod);
 		break;
 	}
 
-- 
2.45.2
Re: [PATCH 3/3] tracing: Show last module text symbols in the stacktrace
Posted by Masami Hiramatsu (Google) 10 months, 1 week ago
On Thu, 6 Feb 2025 12:46:31 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri,  7 Feb 2025 01:59:05 +0900
> "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> 
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > 
> > Since the previous boot trace buffer can include module text address in
> > the stacktrace. As same as the kernel text address, convert the module
> > text address using the module address information.
> 
> Note, this doesn't look like it depends on the first two patches, which I
> don't think we want yet. As that assumes we never disable the buffer once
> we start it, and may start it again. But that's a debate we can have later.

Yes, the first 2 patches are kind of optimization.

> 
> > 
> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > ---
> >  kernel/trace/trace.c        |   76 +++++++++++++++++++++++++++++++++++++++++--
> >  kernel/trace/trace.h        |    4 ++
> >  kernel/trace/trace_output.c |    3 +-
> >  3 files changed, 78 insertions(+), 5 deletions(-)
> > 
> > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> > index 5a064e712fd7..9d942b7c2651 100644
> > --- a/kernel/trace/trace.c
> > +++ b/kernel/trace/trace.c
> > @@ -49,6 +49,7 @@
> >  #include <linux/fsnotify.h>
> >  #include <linux/irq_work.h>
> >  #include <linux/workqueue.h>
> > +#include <linux/sort.h>
> >  
> >  #include <asm/setup.h> /* COMMAND_LINE_SIZE and kaslr_offset() */
> >  
> > @@ -6007,6 +6008,27 @@ struct trace_scratch {
> >  
> >  static DEFINE_MUTEX(scratch_mutex);
> >  
> > +unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
> > +{
> > +	struct trace_scratch *tscratch;
> > +	int i;
> > +
> > +	/* If we don't have last boot delta, return the address */
> > +	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
> > +		return addr;
> > +
> > +	tscratch = tr->scratch;
> > +	if (tscratch && tr->module_delta && tscratch->entries[0].mod_addr < addr) {
> > +		/* Note that entries are sorted */
> > +		for (i = 0; i < tr->nr_modules; i++)
> > +			if (addr < tscratch->entries[i].mod_addr)
> > +				break;
> > +		return addr + tr->module_delta[i - 1];
> > +	}
> > +
> > +	return addr + tr->text_delta;
> > +}
> > +
> >  static int save_mod(struct module *mod, void *data)
> >  {
> >  	struct trace_array *tr = data;
> > @@ -6068,6 +6090,9 @@ static void update_last_data(struct trace_array *tr)
> >  
> >  	/* Using current data now */
> >  	tr->text_delta = 0;
> > +	kfree(tr->module_delta);
> > +	tr->module_delta = NULL;
> > +	tr->nr_modules = 0;
> >  
> >  	if (!tr->scratch)
> >  		return;
> 
> The above could probably go after the check for !tr->scratch.

Indeed.

So TRACE_ARRAY_FL_LAST_BOOT means the buffer is on a reserved memory and it
still have the previous boot data. OTOH, `tr->scratch` means the buffer has
scratch pad area. And currently last_boot_info related fields are used
only if there is a scratch pad area. Currently the reserved memory based
buffer must have the scratch pad, but you might expect there could be
the previous boot data without scratch pad area in the future?

> 
> > @@ -9351,10 +9376,37 @@ static struct dentry *trace_instance_dir;
> >  static void
> >  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
> >  
> > +static int make_mod_delta(struct module *mod, void *data)
> > +{
> > +	struct trace_scratch *tscratch;
> > +	struct trace_mod_entry *entry;
> > +	struct trace_array *tr = data;
> > +	int i;
> > +
> > +	tscratch = tr->scratch;
> > +	for (i = 0; i < tr->nr_modules; i++) {
> > +		entry = &tscratch->entries[i];
> > +		if (!strcmp(mod->name, entry->mod_name)) {
> > +			tr->module_delta[i] = (unsigned long)mod->mem[MOD_TEXT].base - entry->mod_addr;
> > +			return 1;
> 
> Returning 1 causes the module_loop to stop. That is, this will only process
> the first module it finds and then no more after that.

Ah, good catch! I must have been half asleep. :(

> 
> > +		}
> > +	}
> 
> Doesn't this assume that we have the same modules loaded?

Yes, I thought that was your idea to show the symbols if the same module
is loaded. We can use 

> 
> Note, I do plan on adding another field in the trace_scratch structure that
> has the uname, then only allow the ascii trace to use the current kallsyms
> if the uname matches.

Yeah, it is OK. I thought the build-id is useful, but uname is also good.

> 
> > +	return 0;
> > +}
> > +
> > +static int mod_addr_comp(const void *a, const void *b, const void *data)
> > +{
> > +	const struct trace_mod_entry *e1 = a;
> > +	const struct trace_mod_entry *e2 = b;
> > +
> > +	return e1->mod_addr - e2->mod_addr;
> 
> Hmm, could this possibly cause an overflow issue? If the two addresses are
> more than 32 bits apart?

I guess the modules are loaded in the same area (mostly in vmalloc) and
maybe the area size is under 2GB. Anyway, we can shift down it by page size.

> 
> > +}
> > +
> >  static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned int size)
> >  {
> >  	struct trace_scratch *tscratch = scratch;
> >  	struct trace_mod_entry *entry;
> > +	int i, nr_entries;
> >  
> >  	if (!scratch)
> >  		return;
> > @@ -9371,7 +9423,7 @@ static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned
> >  		goto reset;
> >  
> >  	/* Check if each module name is a valid string */
> > -	for (int i = 0; i < tscratch->nr_entries; i++) {
> > +	for (i = 0; i < tscratch->nr_entries; i++) {
> >  		int n;
> >  
> >  		entry = &tscratch->entries[i];
> > @@ -9385,6 +9437,20 @@ static void setup_trace_scratch(struct trace_array *tr, void *scratch, unsigned
> >  		if (n == MODULE_NAME_LEN)
> >  			goto reset;
> >  	}
> > +	nr_entries = i;
> > +	/* Allocate module delta array */
> > +	tr->module_delta = kcalloc(nr_entries, sizeof(long), GFP_KERNEL);
> > +	if (!tr->module_delta)
> 
> I wonder if this should trigger a WARN_ON()?

I think this module_delta is optionally decode the previous boot data.
So maybe pr_info() is enough.

> 
> > +		goto reset;
> > +	tr->nr_modules = nr_entries;
> > +
> > +	/* Sort module table by base address. */
> > +	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
> > +		mod_addr_comp, NULL, NULL);
> > +
> > +	/* Scan modules */
> > +	module_for_each_mod(make_mod_delta, tr);
> > +
> >  	return;
> >   reset:
> >  	/* Invalid trace modules */
> 
> 
> BTW, here's my removal patch:

Thank you! and as I discussed above, I think we can postpone the module
removal until the next module is loaded on the same address.
(or, we can use the scratch pad as a log structured table, in this case
we can set a removal flag and replace the oldest removed one. Something
like LRU.)

Thanks,

> 
> -- Steve
> 
> 
> From 521bc0e5286c93eb4be981d22a6c05043a78b555 Mon Sep 17 00:00:00 2001
> From: Steven Rostedt <rostedt@goodmis.org>
> Date: Wed, 5 Feb 2025 11:55:51 -0500
> Subject: [PATCH] tracing: Update modules to persistent instances when removed
> 
> When a module is removed, remove it from the list of saved modules in the
> persistent memory to any persistent instance that is currently active.
> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
>  kernel/trace/trace.c | 69 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 65 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 443f2bc5b856..13eeb8df8b8c 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -6001,6 +6001,7 @@ struct trace_mod_entry {
>  
>  struct trace_scratch {
>  	unsigned long		kaslr_addr;
> +	unsigned long		first_free_slot;
>  	unsigned long		nr_entries;
>  	struct trace_mod_entry	entries[];
>  };
> @@ -6012,6 +6013,7 @@ static int save_mod(struct module *mod, void *data)
>  	struct trace_array *tr = data;
>  	struct trace_scratch *tscratch;
>  	struct trace_mod_entry *entry;
> +	unsigned int idx;
>  	unsigned int size;
>  
>  	guard(mutex)(&scratch_mutex);
> @@ -6021,16 +6023,60 @@ static int save_mod(struct module *mod, void *data)
>  		return -1;
>  	size = tr->scratch_size;
>  
> -	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
> -		return -1;
> +	idx = tscratch->first_free_slot < tscratch->nr_entries ?
> +		tscratch->first_free_slot : tscratch->nr_entries;
>  
> -	entry = &tscratch->entries[tscratch->nr_entries];
> +	if (struct_size(tscratch, entries, idx + 1) > size)
> +		return -1;
>  
> -	tscratch->nr_entries++;
> +	entry = &tscratch->entries[idx];
>  
>  	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
>  	strscpy(entry->mod_name, mod->name);
>  
> +	if (idx == tscratch->nr_entries)
> +		tscratch->nr_entries++;
> +
> +	for (idx++; idx < tscratch->nr_entries; idx++) {
> +		entry = &tscratch->entries[idx];
> +		if (!entry->mod_addr)
> +			break;
> +	}
> +
> +	tscratch->first_free_slot = idx;
> +
> +	return 0;
> +}
> +
> +static int remove_mod(struct module *mod, void *data)
> +{
> +	struct trace_array *tr = data;
> +	struct trace_scratch *tscratch;
> +	struct trace_mod_entry *entry;
> +	unsigned int idx;
> +	unsigned int size;
> +
> +	guard(mutex)(&scratch_mutex);
> +
> +	tscratch = tr->scratch;
> +	if (!tscratch)
> +		return -1;
> +	size = tr->scratch_size;
> +
> +	for (idx = 0; idx < tscratch->nr_entries; idx++) {
> +		entry = &tscratch->entries[idx];
> +		if (entry->mod_addr == (unsigned long)mod->mem[MOD_TEXT].base)
> +			break;
> +	}
> +	if (idx == tscratch->nr_entries)
> +		return -1;
> +
> +	if (idx < tscratch->first_free_slot)
> +		tscratch->first_free_slot = idx;
> +
> +	entry->mod_addr = 0;
> +	entry->mod_name[0] = '\0';
> +
>  	return 0;
>  }
>  
> @@ -10112,6 +10158,20 @@ static void trace_module_record(struct module *mod)
>  	}
>  }
>  
> +static void trace_module_delete(struct module *mod)
> +{
> +	struct trace_array *tr;
> +
> +	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
> +		/* Update any persistent trace array that has already been started */
> +		if ((tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT)) ==
> +		    TRACE_ARRAY_FL_BOOT) {
> +			if (trace_array_active(tr))
> +				remove_mod(mod, tr);
> +		}
> +	}
> +}
> +
>  static int trace_module_notify(struct notifier_block *self,
>  			       unsigned long val, void *data)
>  {
> @@ -10124,6 +10184,7 @@ static int trace_module_notify(struct notifier_block *self,
>  		break;
>  	case MODULE_STATE_GOING:
>  		trace_module_remove_evals(mod);
> +		trace_module_delete(mod);
>  		break;
>  	}
>  
> -- 
> 2.45.2
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>