[PATCH v2] perf record: Refactor ARM64 leaf caller setup out of arch

Ian Rogers posted 1 patch 1 week, 6 days ago
tools/perf/arch/arm64/util/Build                     |  1 -
tools/perf/arch/arm64/util/machine.c                 | 12 ------------
tools/perf/builtin-record.c                          | 11 +++++------
tools/perf/util/arm64-frame-pointer-unwind-support.c |  6 ++++++
tools/perf/util/arm64-frame-pointer-unwind-support.h |  2 ++
tools/perf/util/callchain.h                          |  2 --
6 files changed, 13 insertions(+), 21 deletions(-)
delete mode 100644 tools/perf/arch/arm64/util/machine.c
[PATCH v2] perf record: Refactor ARM64 leaf caller setup out of arch
Posted by Ian Rogers 1 week, 6 days ago
Code in tools/perf/arch causes portability issues/opaqueness and LTO
issues due to the use of weak symbols. Move the adding of LR to the
sample_user_regs into arm64-frame-pointer-unwind-support.c conditional
on EM_HOST == EM_AARCH64 (false on all non-ARM64 builds). This also
better encapsulates the use of the sampled registers by
get_leaf_frame_caller_aarch64 and the set up by the new
add_leaf_frame_caller_opts_aarch64, exposing opportunities for
possibly sampling PC and SP to help the unwinder.

Reviewed-by: James Clark <james.clark@linaro.org>
Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/arch/arm64/util/Build                     |  1 -
 tools/perf/arch/arm64/util/machine.c                 | 12 ------------
 tools/perf/builtin-record.c                          | 11 +++++------
 tools/perf/util/arm64-frame-pointer-unwind-support.c |  6 ++++++
 tools/perf/util/arm64-frame-pointer-unwind-support.h |  2 ++
 tools/perf/util/callchain.h                          |  2 --
 6 files changed, 13 insertions(+), 21 deletions(-)
 delete mode 100644 tools/perf/arch/arm64/util/machine.c

diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 4e06a08d281a..638aa6948ab5 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -5,7 +5,6 @@ perf-util-y += ../../arm/util/pmu.o
 perf-util-y += arm-spe.o
 perf-util-y += header.o
 perf-util-y += hisi-ptt.o
-perf-util-y += machine.o
 perf-util-y += mem-events.o
 perf-util-y += pmu.o
 perf-util-y += tsc.o
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
deleted file mode 100644
index 80fb13c958d9..000000000000
--- a/tools/perf/arch/arm64/util/machine.c
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include "callchain.h" // prototype of arch__add_leaf_frame_record_opts
-#include "perf_regs.h"
-#include "record.h"
-
-#define SMPL_REG_MASK(b) (1ULL << (b))
-
-void arch__add_leaf_frame_record_opts(struct record_opts *opts)
-{
-	opts->sample_user_regs |= SMPL_REG_MASK(PERF_REG_ARM64_LR);
-}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4a5eba498c02..272bba7f4b9e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -14,6 +14,7 @@
 #include "util/parse-events.h"
 #include "util/config.h"
 
+#include "util/arm64-frame-pointer-unwind-support.h"
 #include "util/callchain.h"
 #include "util/cgroup.h"
 #include "util/header.h"
@@ -3230,10 +3231,6 @@ static int record__parse_off_cpu_thresh(const struct option *opt,
 	return 0;
 }
 
-void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
-{
-}
-
 static int parse_control_option(const struct option *opt,
 				const char *str,
 				int unset __maybe_unused)
@@ -4319,8 +4316,10 @@ int cmd_record(int argc, const char **argv)
 
 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
 
-	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
-		arch__add_leaf_frame_record_opts(&rec->opts);
+	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) {
+		if (EM_HOST == EM_AARCH64)
+			add_leaf_frame_caller_opts_aarch64(&rec->opts);
+	}
 
 	err = -ENOMEM;
 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
index 858ce2b01812..3af8c7a466e0 100644
--- a/tools/perf/util/arm64-frame-pointer-unwind-support.c
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
@@ -2,6 +2,7 @@
 #include "arm64-frame-pointer-unwind-support.h"
 #include "callchain.h"
 #include "event.h"
+#include "record.h"
 #include "unwind.h"
 #include <string.h>
 
@@ -16,6 +17,11 @@ struct entries {
 
 #define SMPL_REG_MASK(b) (1ULL << (b))
 
+void add_leaf_frame_caller_opts_aarch64(struct record_opts *opts)
+{
+	opts->sample_user_regs |= SMPL_REG_MASK(PERF_REG_ARM64_LR);
+}
+
 static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
 {
 	struct regs_dump *regs;
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
index 42d3a45490f5..ba35b295bfcd 100644
--- a/tools/perf/util/arm64-frame-pointer-unwind-support.h
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -5,8 +5,10 @@
 #include <linux/types.h>
 
 struct perf_sample;
+struct record_opts;
 struct thread;
 
+void add_leaf_frame_caller_opts_aarch64(struct record_opts *opts);
 u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
 
 #endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 06d463ccc7a0..b7702d65ad60 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -277,8 +277,6 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
 }
 #endif
 
-void arch__add_leaf_frame_record_opts(struct record_opts *opts);
-
 char *callchain_list__sym_name(struct callchain_list *cl,
 			       char *bf, size_t bfsize, bool show_dso);
 char *callchain_node__scnprintf_value(struct callchain_node *node,
-- 
2.54.0.563.g4f69b47b94-goog
Re: [PATCH v2] perf record: Refactor ARM64 leaf caller setup out of arch
Posted by Ian Rogers 1 week, 4 days ago
On Mon, May 11, 2026 at 10:41 PM Ian Rogers <irogers@google.com> wrote:
>
> Code in tools/perf/arch causes portability issues/opaqueness and LTO
> issues due to the use of weak symbols. Move the adding of LR to the
> sample_user_regs into arm64-frame-pointer-unwind-support.c conditional
> on EM_HOST == EM_AARCH64 (false on all non-ARM64 builds). This also
> better encapsulates the use of the sampled registers by
> get_leaf_frame_caller_aarch64 and the set up by the new
> add_leaf_frame_caller_opts_aarch64, exposing opportunities for
> possibly sampling PC and SP to help the unwinder.
>
> Reviewed-by: James Clark <james.clark@linaro.org>
> Signed-off-by: Ian Rogers <irogers@google.com>

Ping.

Thanks,
Ian

> ---
>  tools/perf/arch/arm64/util/Build                     |  1 -
>  tools/perf/arch/arm64/util/machine.c                 | 12 ------------
>  tools/perf/builtin-record.c                          | 11 +++++------
>  tools/perf/util/arm64-frame-pointer-unwind-support.c |  6 ++++++
>  tools/perf/util/arm64-frame-pointer-unwind-support.h |  2 ++
>  tools/perf/util/callchain.h                          |  2 --
>  6 files changed, 13 insertions(+), 21 deletions(-)
>  delete mode 100644 tools/perf/arch/arm64/util/machine.c
>
> diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
> index 4e06a08d281a..638aa6948ab5 100644
> --- a/tools/perf/arch/arm64/util/Build
> +++ b/tools/perf/arch/arm64/util/Build
> @@ -5,7 +5,6 @@ perf-util-y += ../../arm/util/pmu.o
>  perf-util-y += arm-spe.o
>  perf-util-y += header.o
>  perf-util-y += hisi-ptt.o
> -perf-util-y += machine.o
>  perf-util-y += mem-events.o
>  perf-util-y += pmu.o
>  perf-util-y += tsc.o
> diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
> deleted file mode 100644
> index 80fb13c958d9..000000000000
> --- a/tools/perf/arch/arm64/util/machine.c
> +++ /dev/null
> @@ -1,12 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -
> -#include "callchain.h" // prototype of arch__add_leaf_frame_record_opts
> -#include "perf_regs.h"
> -#include "record.h"
> -
> -#define SMPL_REG_MASK(b) (1ULL << (b))
> -
> -void arch__add_leaf_frame_record_opts(struct record_opts *opts)
> -{
> -       opts->sample_user_regs |= SMPL_REG_MASK(PERF_REG_ARM64_LR);
> -}
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 4a5eba498c02..272bba7f4b9e 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -14,6 +14,7 @@
>  #include "util/parse-events.h"
>  #include "util/config.h"
>
> +#include "util/arm64-frame-pointer-unwind-support.h"
>  #include "util/callchain.h"
>  #include "util/cgroup.h"
>  #include "util/header.h"
> @@ -3230,10 +3231,6 @@ static int record__parse_off_cpu_thresh(const struct option *opt,
>         return 0;
>  }
>
> -void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
> -{
> -}
> -
>  static int parse_control_option(const struct option *opt,
>                                 const char *str,
>                                 int unset __maybe_unused)
> @@ -4319,8 +4316,10 @@ int cmd_record(int argc, const char **argv)
>
>         evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
>
> -       if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
> -               arch__add_leaf_frame_record_opts(&rec->opts);
> +       if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) {
> +               if (EM_HOST == EM_AARCH64)
> +                       add_leaf_frame_caller_opts_aarch64(&rec->opts);
> +       }
>
>         err = -ENOMEM;
>         if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
> diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c
> index 858ce2b01812..3af8c7a466e0 100644
> --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c
> +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c
> @@ -2,6 +2,7 @@
>  #include "arm64-frame-pointer-unwind-support.h"
>  #include "callchain.h"
>  #include "event.h"
> +#include "record.h"
>  #include "unwind.h"
>  #include <string.h>
>
> @@ -16,6 +17,11 @@ struct entries {
>
>  #define SMPL_REG_MASK(b) (1ULL << (b))
>
> +void add_leaf_frame_caller_opts_aarch64(struct record_opts *opts)
> +{
> +       opts->sample_user_regs |= SMPL_REG_MASK(PERF_REG_ARM64_LR);
> +}
> +
>  static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
>  {
>         struct regs_dump *regs;
> diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
> index 42d3a45490f5..ba35b295bfcd 100644
> --- a/tools/perf/util/arm64-frame-pointer-unwind-support.h
> +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
> @@ -5,8 +5,10 @@
>  #include <linux/types.h>
>
>  struct perf_sample;
> +struct record_opts;
>  struct thread;
>
> +void add_leaf_frame_caller_opts_aarch64(struct record_opts *opts);
>  u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
>
>  #endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 06d463ccc7a0..b7702d65ad60 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -277,8 +277,6 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
>  }
>  #endif
>
> -void arch__add_leaf_frame_record_opts(struct record_opts *opts);
> -
>  char *callchain_list__sym_name(struct callchain_list *cl,
>                                char *bf, size_t bfsize, bool show_dso);
>  char *callchain_node__scnprintf_value(struct callchain_node *node,
> --
> 2.54.0.563.g4f69b47b94-goog
>
Re: [PATCH v2] perf record: Refactor ARM64 leaf caller setup out of arch
Posted by Arnaldo Carvalho de Melo 1 week, 4 days ago
On Thu, May 14, 2026 at 10:38:07AM -0700, Ian Rogers wrote:
> On Mon, May 11, 2026 at 10:41 PM Ian Rogers <irogers@google.com> wrote:
> >
> > Code in tools/perf/arch causes portability issues/opaqueness and LTO
> > issues due to the use of weak symbols. Move the adding of LR to the
> > sample_user_regs into arm64-frame-pointer-unwind-support.c conditional
> > on EM_HOST == EM_AARCH64 (false on all non-ARM64 builds). This also
> > better encapsulates the use of the sampled registers by
> > get_leaf_frame_caller_aarch64 and the set up by the new
> > add_leaf_frame_caller_opts_aarch64, exposing opportunities for
> > possibly sampling PC and SP to help the unwinder.
> >
> > Reviewed-by: James Clark <james.clark@linaro.org>
> > Signed-off-by: Ian Rogers <irogers@google.com>
> 
> Ping.

Thanks, applied to perf-tools-next, for v7.2.

- Arnaldo