The timerlat_hist and timerlat_top commands duplicate the logic for
handling threshold actions. When a threshold is reached, both commands
stop the trace, perform actions, and restart the trace if configured to
continue.
Create a new helper function, timerlat_restart(), to centralize this
shared logic and avoid code duplication. This function now handles the
threshold actions and restarts the necessary trace instances.
Refactor timerlat_hist_main() and the timerlat_top main loops to call
the new helper. This makes the code cleaner and more maintainable.
Signed-off-by: Wander Lairson Costa <wander@redhat.com>
---
tools/tracing/rtla/src/timerlat.c | 31 ++++++++++++++++++++++++++
tools/tracing/rtla/src/timerlat.h | 9 ++++++++
tools/tracing/rtla/src/timerlat_hist.c | 19 ++++++++--------
tools/tracing/rtla/src/timerlat_top.c | 19 ++++++++--------
4 files changed, 60 insertions(+), 18 deletions(-)
diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c
index 56e0b8af041d7..50c7eb00fd6b7 100644
--- a/tools/tracing/rtla/src/timerlat.c
+++ b/tools/tracing/rtla/src/timerlat.c
@@ -22,6 +22,37 @@
static int dma_latency_fd = -1;
+/**
+ * timerlat_restart - handle threshold actions and optionally restart tracing
+ * @tool: pointer to the osnoise_tool instance containing trace contexts
+ * @params: timerlat parameters with threshold action configuration
+ *
+ * Return:
+ * RESTART_OK - Actions executed successfully and tracing restarted
+ * RESTART_STOP - Actions executed but 'continue' flag not set, stop tracing
+ * RESTART_ERROR - Failed to restart tracing after executing actions
+ */
+enum restart_result
+timerlat_restart(const struct osnoise_tool *tool, struct timerlat_params *params)
+{
+ actions_perform(¶ms->common.threshold_actions);
+
+ if (!params->common.threshold_actions.continue_flag)
+ /* continue flag not set, break */
+ return RESTART_STOP;
+
+ /* continue action reached, re-enable tracing */
+ if (tool->record && trace_instance_start(&tool->record->trace))
+ goto err;
+ if (tool->aa && trace_instance_start(&tool->aa->trace))
+ goto err;
+ return RESTART_OK;
+
+err:
+ err_msg("Error restarting trace\n");
+ return RESTART_ERROR;
+}
+
/*
* timerlat_apply_config - apply common configs to the initialized tool
*/
diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h
index fd6065f48bb7f..47a34bb443fa0 100644
--- a/tools/tracing/rtla/src/timerlat.h
+++ b/tools/tracing/rtla/src/timerlat.h
@@ -31,6 +31,15 @@ struct timerlat_params {
#define to_timerlat_params(ptr) container_of(ptr, struct timerlat_params, common)
+enum restart_result {
+ RESTART_OK,
+ RESTART_STOP,
+ RESTART_ERROR = -1,
+};
+
+enum restart_result
+timerlat_restart(const struct osnoise_tool *tool, struct timerlat_params *params);
+
int timerlat_apply_config(struct osnoise_tool *tool, struct timerlat_params *params);
int timerlat_main(int argc, char *argv[]);
int timerlat_enable(struct osnoise_tool *tool);
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 09a3da3f58630..f14fc56c5b4a5 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -1165,18 +1165,19 @@ static int timerlat_hist_bpf_main_loop(struct osnoise_tool *tool)
if (!stop_tracing) {
/* Threshold overflow, perform actions on threshold */
- actions_perform(¶ms->common.threshold_actions);
+ enum restart_result result;
- if (!params->common.threshold_actions.continue_flag)
- /* continue flag not set, break */
+ result = timerlat_restart(tool, params);
+ if (result == RESTART_STOP)
break;
- /* continue action reached, re-enable tracing */
- if (tool->record)
- trace_instance_start(&tool->record->trace);
- if (tool->aa)
- trace_instance_start(&tool->aa->trace);
- timerlat_bpf_restart_tracing();
+ if (result == RESTART_ERROR)
+ return -1;
+
+ if (timerlat_bpf_restart_tracing()) {
+ err_msg("Error restarting BPF trace\n");
+ return -1;
+ }
}
}
timerlat_bpf_detach();
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 7679820e72db5..d831a9e1818f4 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -921,18 +921,19 @@ timerlat_top_bpf_main_loop(struct osnoise_tool *tool)
if (wait_retval == 1) {
/* Stopping requested by tracer */
- actions_perform(¶ms->common.threshold_actions);
+ enum restart_result result;
- if (!params->common.threshold_actions.continue_flag)
- /* continue flag not set, break */
+ result = timerlat_restart(tool, params);
+ if (result == RESTART_STOP)
break;
- /* continue action reached, re-enable tracing */
- if (tool->record)
- trace_instance_start(&tool->record->trace);
- if (tool->aa)
- trace_instance_start(&tool->aa->trace);
- timerlat_bpf_restart_tracing();
+ if (result == RESTART_ERROR)
+ return -1;
+
+ if (timerlat_bpf_restart_tracing()) {
+ err_msg("Error restarting BPF trace\n");
+ return -1;
+ }
}
/* is there still any user-threads ? */
--
2.51.1
On Mon, 2025-11-17 at 15:41 -0300, Wander Lairson Costa wrote:
> +enum restart_result
> +timerlat_restart(const struct osnoise_tool *tool, struct timerlat_params *params)
> +{
> + actions_perform(¶ms->common.threshold_actions);
> +
> + if (!params->common.threshold_actions.continue_flag)
> + /* continue flag not set, break */
> + return RESTART_STOP;
> +
> + /* continue action reached, re-enable tracing */
> + if (tool->record && trace_instance_start(&tool->record->trace))
> + goto err;
> + if (tool->aa && trace_instance_start(&tool->aa->trace))
> + goto err;
> + return RESTART_OK;
> +
> +err:
> + err_msg("Error restarting trace\n");
> + return RESTART_ERROR;
> +}
The non-BPF functions in common.c have the same logic and should also
call this. This isn't timerlat-specific.
> diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
> index 09a3da3f58630..f14fc56c5b4a5 100644
> --- a/tools/tracing/rtla/src/timerlat_hist.c
> +++ b/tools/tracing/rtla/src/timerlat_hist.c
> @@ -1165,18 +1165,19 @@ static int timerlat_hist_bpf_main_loop(struct osnoise_tool *tool)
>
> if (!stop_tracing) {
> /* Threshold overflow, perform actions on threshold */
> - actions_perform(¶ms->common.threshold_actions);
> + enum restart_result result;
>
> - if (!params->common.threshold_actions.continue_flag)
> - /* continue flag not set, break */
> + result = timerlat_restart(tool, params);
> + if (result == RESTART_STOP)
> break;
>
> - /* continue action reached, re-enable tracing */
> - if (tool->record)
> - trace_instance_start(&tool->record->trace);
> - if (tool->aa)
> - trace_instance_start(&tool->aa->trace);
> - timerlat_bpf_restart_tracing();
> + if (result == RESTART_ERROR)
> + return -1;
Does it matter that we're not detaching on an error here? Is this
something that gets cleaned up automatically (and if so, why do we ever
need to do it explicitly)?
> +
> + if (timerlat_bpf_restart_tracing()) {
> + err_msg("Error restarting BPF trace\n");
> + return -1;
> + }
[insert rant about not being able to use exceptions in userspace code in
the year 2025]
-Crystal
On Mon, Nov 24, 2025 at 9:46 PM Crystal Wood <crwood@redhat.com> wrote:
>
> On Mon, 2025-11-17 at 15:41 -0300, Wander Lairson Costa wrote:
>
> > +enum restart_result
> > +timerlat_restart(const struct osnoise_tool *tool, struct timerlat_params *params)
> > +{
> > + actions_perform(¶ms->common.threshold_actions);
> > +
> > + if (!params->common.threshold_actions.continue_flag)
> > + /* continue flag not set, break */
> > + return RESTART_STOP;
> > +
> > + /* continue action reached, re-enable tracing */
> > + if (tool->record && trace_instance_start(&tool->record->trace))
> > + goto err;
> > + if (tool->aa && trace_instance_start(&tool->aa->trace))
> > + goto err;
> > + return RESTART_OK;
> > +
> > +err:
> > + err_msg("Error restarting trace\n");
> > + return RESTART_ERROR;
> > +}
>
> The non-BPF functions in common.c have the same logic and should also
> call this. This isn't timerlat-specific.
>
I will replace them here, thanks.
>
> > diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
> > index 09a3da3f58630..f14fc56c5b4a5 100644
> > --- a/tools/tracing/rtla/src/timerlat_hist.c
> > +++ b/tools/tracing/rtla/src/timerlat_hist.c
> > @@ -1165,18 +1165,19 @@ static int timerlat_hist_bpf_main_loop(struct osnoise_tool *tool)
> >
> > if (!stop_tracing) {
> > /* Threshold overflow, perform actions on threshold */
> > - actions_perform(¶ms->common.threshold_actions);
> > + enum restart_result result;
> >
> > - if (!params->common.threshold_actions.continue_flag)
> > - /* continue flag not set, break */
> > + result = timerlat_restart(tool, params);
> > + if (result == RESTART_STOP)
> > break;
> >
> > - /* continue action reached, re-enable tracing */
> > - if (tool->record)
> > - trace_instance_start(&tool->record->trace);
> > - if (tool->aa)
> > - trace_instance_start(&tool->aa->trace);
> > - timerlat_bpf_restart_tracing();
> > + if (result == RESTART_ERROR)
> > + return -1;
>
> Does it matter that we're not detaching on an error here? Is this
> something that gets cleaned up automatically (and if so, why do we ever
> need to do it explicitly)?
>
On process exit, it does.
> > +
> > + if (timerlat_bpf_restart_tracing()) {
> > + err_msg("Error restarting BPF trace\n");
> > + return -1;
> > + }
>
> [insert rant about not being able to use exceptions in userspace code in
> the year 2025]
>
I actually find exceptions an anti-pattern. Modern languages like Zig,
Go and Rust came back to error returning.
> -Crystal
>
On Tue, 2025-11-25 at 11:20 -0300, Wander Lairson Costa wrote:
> On Mon, Nov 24, 2025 at 9:46 PM Crystal Wood <crwood@redhat.com> wrote:
> >
> > On Mon, 2025-11-17 at 15:41 -0300, Wander Lairson Costa wrote:
>
> > > +
> > > + if (timerlat_bpf_restart_tracing()) {
> > > + err_msg("Error restarting BPF trace\n");
> > > + return -1;
> > > + }
> >
> > [insert rant about not being able to use exceptions in userspace code in
> > the year 2025]
> >
>
> I actually find exceptions an anti-pattern. Modern languages like Zig,
> Go and Rust came back to error returning.
Maybe I'm behind the times, but I see exceptions and error returns as
complementary... not everything should be an exception and I can
certainly see how they could be overused in an anti-pattern way, but
they're nice for getting useful information out rather than "something
failed" without having to add a bunch of debug prints.
-Crystal
On Tue, Nov 25, 2025 at 2:36 PM Crystal Wood <crwood@redhat.com> wrote:
>
> On Tue, 2025-11-25 at 11:20 -0300, Wander Lairson Costa wrote:
> > On Mon, Nov 24, 2025 at 9:46 PM Crystal Wood <crwood@redhat.com> wrote:
> > >
> > > On Mon, 2025-11-17 at 15:41 -0300, Wander Lairson Costa wrote:
> >
> > > > +
> > > > + if (timerlat_bpf_restart_tracing()) {
> > > > + err_msg("Error restarting BPF trace\n");
> > > > + return -1;
> > > > + }
> > >
> > > [insert rant about not being able to use exceptions in userspace code in
> > > the year 2025]
> > >
> >
> > I actually find exceptions an anti-pattern. Modern languages like Zig,
> > Go and Rust came back to error returning.
>
> Maybe I'm behind the times, but I see exceptions and error returns as
> complementary... not everything should be an exception and I can
> certainly see how they could be overused in an anti-pattern way, but
> they're nice for getting useful information out rather than "something
> failed" without having to add a bunch of debug prints.
>
IIRC, you do get stack trace from the error returns. I think Golang
didn't/don't by
default. Exception as an alias for "panic()" is fine, IMO. What I've
seen in production
is services down due to unhandled exceptions. One of the reasons is
that there is nothing
in the function signature that says if or what of exceptions it raises.
> -Crystal
>
© 2016 - 2025 Red Hat, Inc.