[PATCH v3] tracing: Record trace_clock and recover when reboot

Masami Hiramatsu (Google) posted 1 patch 9 months ago
There is a newer version of this series
kernel/trace/trace.c |   16 ++++++++++++++++
1 file changed, 16 insertions(+)
[PATCH v3] tracing: Record trace_clock and recover when reboot
Posted by Masami Hiramatsu (Google) 9 months ago
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Record trace_clock information in the trace_scratch area and recover
the trace_clock when boot, so that reader can docode the timestamp
correctly.
Note that since most trace_clocks records the timestamp in nano-
seconds, this is not a bug. But some trace_clock, like counter and
tsc will record the counter value. Only for those trace_clock user
needs this information.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v3:
   - Save clock_id instead of its name.
 Changes in v2:
   - instead of exposing it via last_boot_info, set the current
     trace_clock as the same clock we used in the last boot.
---
 kernel/trace/trace.c |   16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cf51c30b137f..2a060c62d686 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6066,6 +6066,7 @@ struct trace_mod_entry {
 };
 
 struct trace_scratch {
+	int			clock_id;
 	unsigned long		text_addr;
 	unsigned long		nr_entries;
 	struct trace_mod_entry	entries[];
@@ -6181,6 +6182,7 @@ static void update_last_data(struct trace_array *tr)
 	if (tr->scratch) {
 		struct trace_scratch *tscratch = tr->scratch;
 
+		tscratch->clock_id = tr->clock_id;
 		memset(tscratch->entries, 0,
 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
 		tscratch->nr_entries = 0;
@@ -7403,6 +7405,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr)
 	tracing_reset_online_cpus(&tr->max_buffer);
 #endif
 
+	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
+		struct trace_scratch *tscratch = tr->scratch;
+
+		tscratch->clock_id = i;
+	}
+
 	mutex_unlock(&trace_types_lock);
 
 	return 0;
@@ -9628,6 +9636,14 @@ static void setup_trace_scratch(struct trace_array *tr,
 
 	/* Scan modules to make text delta for modules. */
 	module_for_each_mod(make_mod_delta, tr);
+
+	/* Set trace_clock as the same of the previous boot. */
+	if (tscratch->clock_id != tr->clock_id) {
+		if (tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
+			pr_info("the previous trace_clock info is not valid.");
+			goto reset;
+		}
+	}
 	return;
  reset:
 	/* Invalid trace modules */
Re: [PATCH v3] tracing: Record trace_clock and recover when reboot
Posted by Steven Rostedt 9 months ago
On Tue, 13 May 2025 09:31:27 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Record trace_clock information in the trace_scratch area and recover
> the trace_clock when boot, so that reader can docode the timestamp
> correctly.
> Note that since most trace_clocks records the timestamp in nano-
> seconds, this is not a bug. But some trace_clock, like counter and
> tsc will record the counter value. Only for those trace_clock user
> needs this information.
> 
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> ---
>  Changes in v3:
>    - Save clock_id instead of its name.
>  Changes in v2:
>    - instead of exposing it via last_boot_info, set the current
>      trace_clock as the same clock we used in the last boot.
> ---
>  kernel/trace/trace.c |   16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index cf51c30b137f..2a060c62d686 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -6066,6 +6066,7 @@ struct trace_mod_entry {
>  };
>  
>  struct trace_scratch {
> +	int			clock_id;

Should be "unsigned int"

>  	unsigned long		text_addr;
>  	unsigned long		nr_entries;
>  	struct trace_mod_entry	entries[];
> @@ -6181,6 +6182,7 @@ static void update_last_data(struct trace_array *tr)
>  	if (tr->scratch) {
>  		struct trace_scratch *tscratch = tr->scratch;
>  
> +		tscratch->clock_id = tr->clock_id;
>  		memset(tscratch->entries, 0,
>  		       flex_array_size(tscratch, entries, tscratch->nr_entries));
>  		tscratch->nr_entries = 0;
> @@ -7403,6 +7405,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr)
>  	tracing_reset_online_cpus(&tr->max_buffer);
>  #endif
>  
> +	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
> +		struct trace_scratch *tscratch = tr->scratch;
> +
> +		tscratch->clock_id = i;
> +	}
> +
>  	mutex_unlock(&trace_types_lock);
>  
>  	return 0;
> @@ -9628,6 +9636,14 @@ static void setup_trace_scratch(struct trace_array *tr,
>  
>  	/* Scan modules to make text delta for modules. */
>  	module_for_each_mod(make_mod_delta, tr);
> +
> +	/* Set trace_clock as the same of the previous boot. */
> +	if (tscratch->clock_id != tr->clock_id) {
> +		if (tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {

In case the tscratch->clock_id gets corrupted, we better make sure it
doesn't overflow:

		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {

-- Steve


> +			pr_info("the previous trace_clock info is not valid.");
> +			goto reset;
> +		}
> +	}
>  	return;
>   reset:
>  	/* Invalid trace modules */
Re: [PATCH v3] tracing: Record trace_clock and recover when reboot
Posted by Masami Hiramatsu (Google) 9 months ago
On Tue, 13 May 2025 13:56:51 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Tue, 13 May 2025 09:31:27 +0900
> "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> 
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > 
> > Record trace_clock information in the trace_scratch area and recover
> > the trace_clock when boot, so that reader can docode the timestamp
> > correctly.
> > Note that since most trace_clocks records the timestamp in nano-
> > seconds, this is not a bug. But some trace_clock, like counter and
> > tsc will record the counter value. Only for those trace_clock user
> > needs this information.
> > 
> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > ---
> >  Changes in v3:
> >    - Save clock_id instead of its name.
> >  Changes in v2:
> >    - instead of exposing it via last_boot_info, set the current
> >      trace_clock as the same clock we used in the last boot.
> > ---
> >  kernel/trace/trace.c |   16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> > 
> > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> > index cf51c30b137f..2a060c62d686 100644
> > --- a/kernel/trace/trace.c
> > +++ b/kernel/trace/trace.c
> > @@ -6066,6 +6066,7 @@ struct trace_mod_entry {
> >  };
> >  
> >  struct trace_scratch {
> > +	int			clock_id;
> 
> Should be "unsigned int"

OK.

> 
> >  	unsigned long		text_addr;
> >  	unsigned long		nr_entries;
> >  	struct trace_mod_entry	entries[];
> > @@ -6181,6 +6182,7 @@ static void update_last_data(struct trace_array *tr)
> >  	if (tr->scratch) {
> >  		struct trace_scratch *tscratch = tr->scratch;
> >  
> > +		tscratch->clock_id = tr->clock_id;
> >  		memset(tscratch->entries, 0,
> >  		       flex_array_size(tscratch, entries, tscratch->nr_entries));
> >  		tscratch->nr_entries = 0;
> > @@ -7403,6 +7405,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr)
> >  	tracing_reset_online_cpus(&tr->max_buffer);
> >  #endif
> >  
> > +	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
> > +		struct trace_scratch *tscratch = tr->scratch;
> > +
> > +		tscratch->clock_id = i;
> > +	}
> > +
> >  	mutex_unlock(&trace_types_lock);
> >  
> >  	return 0;
> > @@ -9628,6 +9636,14 @@ static void setup_trace_scratch(struct trace_array *tr,
> >  
> >  	/* Scan modules to make text delta for modules. */
> >  	module_for_each_mod(make_mod_delta, tr);
> > +
> > +	/* Set trace_clock as the same of the previous boot. */
> > +	if (tscratch->clock_id != tr->clock_id) {
> > +		if (tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
> 
> In case the tscratch->clock_id gets corrupted, we better make sure it
> doesn't overflow:
> 
> 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
> 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {

Good catch! BTW, don't we have any check about scratch area?
(e.g. adding a checksum)

Thank you,

> 
> -- Steve
> 
> 
> > +			pr_info("the previous trace_clock info is not valid.");
> > +			goto reset;
> > +		}
> > +	}
> >  	return;
> >   reset:
> >  	/* Invalid trace modules */
> 
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>
Re: [PATCH v3] tracing: Record trace_clock and recover when reboot
Posted by Steven Rostedt 9 months ago
On Wed, 14 May 2025 10:14:31 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> > In case the tscratch->clock_id gets corrupted, we better make sure it
> > doesn't overflow:
> > 
> > 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
> > 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {  
> 
> Good catch! BTW, don't we have any check about scratch area?
> (e.g. adding a checksum)

We could, but it would require updating every time we update the
scratch area.

Currently, the scratch area is only considered valid if the ring buffer
is proven to be valid. But if this becomes a problem, we probably
should add a checksum or something.

-- Steve