kernel/trace/trace.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Record trace_clock information in the trace_scratch area and recover
the trace_clock when boot, so that reader can docode the timestamp
correctly.
Note that since most trace_clocks records the timestamp in nano-
seconds, this is not a bug. But some trace_clock, like counter and
tsc will record the counter value. Only for those trace_clock user
needs this information.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v3:
- Save clock_id instead of its name.
Changes in v2:
- instead of exposing it via last_boot_info, set the current
trace_clock as the same clock we used in the last boot.
---
kernel/trace/trace.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cf51c30b137f..2a060c62d686 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6066,6 +6066,7 @@ struct trace_mod_entry {
};
struct trace_scratch {
+ int clock_id;
unsigned long text_addr;
unsigned long nr_entries;
struct trace_mod_entry entries[];
@@ -6181,6 +6182,7 @@ static void update_last_data(struct trace_array *tr)
if (tr->scratch) {
struct trace_scratch *tscratch = tr->scratch;
+ tscratch->clock_id = tr->clock_id;
memset(tscratch->entries, 0,
flex_array_size(tscratch, entries, tscratch->nr_entries));
tscratch->nr_entries = 0;
@@ -7403,6 +7405,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr)
tracing_reset_online_cpus(&tr->max_buffer);
#endif
+ if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
+ struct trace_scratch *tscratch = tr->scratch;
+
+ tscratch->clock_id = i;
+ }
+
mutex_unlock(&trace_types_lock);
return 0;
@@ -9628,6 +9636,14 @@ static void setup_trace_scratch(struct trace_array *tr,
/* Scan modules to make text delta for modules. */
module_for_each_mod(make_mod_delta, tr);
+
+ /* Set trace_clock as the same of the previous boot. */
+ if (tscratch->clock_id != tr->clock_id) {
+ if (tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
+ pr_info("the previous trace_clock info is not valid.");
+ goto reset;
+ }
+ }
return;
reset:
/* Invalid trace modules */
On Tue, 13 May 2025 09:31:27 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
> Record trace_clock information in the trace_scratch area and recover
> the trace_clock when boot, so that reader can docode the timestamp
> correctly.
> Note that since most trace_clocks records the timestamp in nano-
> seconds, this is not a bug. But some trace_clock, like counter and
> tsc will record the counter value. Only for those trace_clock user
> needs this information.
>
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> ---
> Changes in v3:
> - Save clock_id instead of its name.
> Changes in v2:
> - instead of exposing it via last_boot_info, set the current
> trace_clock as the same clock we used in the last boot.
> ---
> kernel/trace/trace.c | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index cf51c30b137f..2a060c62d686 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -6066,6 +6066,7 @@ struct trace_mod_entry {
> };
>
> struct trace_scratch {
> + int clock_id;
Should be "unsigned int"
> unsigned long text_addr;
> unsigned long nr_entries;
> struct trace_mod_entry entries[];
> @@ -6181,6 +6182,7 @@ static void update_last_data(struct trace_array *tr)
> if (tr->scratch) {
> struct trace_scratch *tscratch = tr->scratch;
>
> + tscratch->clock_id = tr->clock_id;
> memset(tscratch->entries, 0,
> flex_array_size(tscratch, entries, tscratch->nr_entries));
> tscratch->nr_entries = 0;
> @@ -7403,6 +7405,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr)
> tracing_reset_online_cpus(&tr->max_buffer);
> #endif
>
> + if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
> + struct trace_scratch *tscratch = tr->scratch;
> +
> + tscratch->clock_id = i;
> + }
> +
> mutex_unlock(&trace_types_lock);
>
> return 0;
> @@ -9628,6 +9636,14 @@ static void setup_trace_scratch(struct trace_array *tr,
>
> /* Scan modules to make text delta for modules. */
> module_for_each_mod(make_mod_delta, tr);
> +
> + /* Set trace_clock as the same of the previous boot. */
> + if (tscratch->clock_id != tr->clock_id) {
> + if (tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
In case the tscratch->clock_id gets corrupted, we better make sure it
doesn't overflow:
if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
-- Steve
> + pr_info("the previous trace_clock info is not valid.");
> + goto reset;
> + }
> + }
> return;
> reset:
> /* Invalid trace modules */
On Tue, 13 May 2025 13:56:51 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:
> On Tue, 13 May 2025 09:31:27 +0900
> "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
>
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> >
> > Record trace_clock information in the trace_scratch area and recover
> > the trace_clock when boot, so that reader can docode the timestamp
> > correctly.
> > Note that since most trace_clocks records the timestamp in nano-
> > seconds, this is not a bug. But some trace_clock, like counter and
> > tsc will record the counter value. Only for those trace_clock user
> > needs this information.
> >
> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > ---
> > Changes in v3:
> > - Save clock_id instead of its name.
> > Changes in v2:
> > - instead of exposing it via last_boot_info, set the current
> > trace_clock as the same clock we used in the last boot.
> > ---
> > kernel/trace/trace.c | 16 ++++++++++++++++
> > 1 file changed, 16 insertions(+)
> >
> > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> > index cf51c30b137f..2a060c62d686 100644
> > --- a/kernel/trace/trace.c
> > +++ b/kernel/trace/trace.c
> > @@ -6066,6 +6066,7 @@ struct trace_mod_entry {
> > };
> >
> > struct trace_scratch {
> > + int clock_id;
>
> Should be "unsigned int"
OK.
>
> > unsigned long text_addr;
> > unsigned long nr_entries;
> > struct trace_mod_entry entries[];
> > @@ -6181,6 +6182,7 @@ static void update_last_data(struct trace_array *tr)
> > if (tr->scratch) {
> > struct trace_scratch *tscratch = tr->scratch;
> >
> > + tscratch->clock_id = tr->clock_id;
> > memset(tscratch->entries, 0,
> > flex_array_size(tscratch, entries, tscratch->nr_entries));
> > tscratch->nr_entries = 0;
> > @@ -7403,6 +7405,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr)
> > tracing_reset_online_cpus(&tr->max_buffer);
> > #endif
> >
> > + if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
> > + struct trace_scratch *tscratch = tr->scratch;
> > +
> > + tscratch->clock_id = i;
> > + }
> > +
> > mutex_unlock(&trace_types_lock);
> >
> > return 0;
> > @@ -9628,6 +9636,14 @@ static void setup_trace_scratch(struct trace_array *tr,
> >
> > /* Scan modules to make text delta for modules. */
> > module_for_each_mod(make_mod_delta, tr);
> > +
> > + /* Set trace_clock as the same of the previous boot. */
> > + if (tscratch->clock_id != tr->clock_id) {
> > + if (tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
>
> In case the tscratch->clock_id gets corrupted, we better make sure it
> doesn't overflow:
>
> if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
> tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
Good catch! BTW, don't we have any check about scratch area?
(e.g. adding a checksum)
Thank you,
>
> -- Steve
>
>
> > + pr_info("the previous trace_clock info is not valid.");
> > + goto reset;
> > + }
> > + }
> > return;
> > reset:
> > /* Invalid trace modules */
>
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
On Wed, 14 May 2025 10:14:31 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> > In case the tscratch->clock_id gets corrupted, we better make sure it
> > doesn't overflow:
> >
> > if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
> > tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
>
> Good catch! BTW, don't we have any check about scratch area?
> (e.g. adding a checksum)
We could, but it would require updating every time we update the
scratch area.
Currently, the scratch area is only considered valid if the ring buffer
is proven to be valid. But if this becomes a problem, we probably
should add a checksum or something.
-- Steve
© 2016 - 2026 Red Hat, Inc.