tools/perf/builtin-top.c | 6 +- tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/symbol.c | 105 ++++++++++++++++++++++------------- tools/perf/util/symbol.h | 15 +++-- 4 files changed, 84 insertions(+), 44 deletions(-)
Move the idle boolean to a helper symbol__is_idle function. In the
function lazily compute whether a symbol is an idle function taking
into consideration the kernel version and architecture of the
machine. As symbols__insert no longer needs to know if a symbol is for
the kernel, remove the argument.
This change is inspired by mailing list discussion, particularly from
Thomas Richter <tmricht@linux.ibm.com> and Heiko Carstens
<hca@linux.ibm.com>:
https://lore.kernel.org/lkml/20260219113850.354271-1-tmricht@linux.ibm.com/
The change switches x86 matches to use strstarts which means
intel_idle_irq is matched as part of strstarts(name, "intel_idle"), a
change suggested by Honglei Wang <jameshongleiwang@126.com> in:
https://lore.kernel.org/lkml/20260323085255.98173-1-jameshongleiwang@126.com/
Signed-off-by: Ian Rogers <irogers@google.com>
---
v1: https://lore.kernel.org/lkml/20260302234343.564937-1-irogers@google.com/
---
tools/perf/builtin-top.c | 6 +-
tools/perf/util/symbol-elf.c | 2 +-
tools/perf/util/symbol.c | 105 ++++++++++++++++++++++-------------
tools/perf/util/symbol.h | 15 +++--
4 files changed, 84 insertions(+), 44 deletions(-)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 37950efb28ac..bdc1c761cd61 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -751,6 +751,7 @@ static void perf_event__process_sample(const struct perf_tool *tool,
{
struct perf_top *top = container_of(tool, struct perf_top, tool);
struct addr_location al;
+ struct dso *dso = NULL;
if (!machine && perf_guest) {
static struct intlist *seen;
@@ -830,7 +831,10 @@ static void perf_event__process_sample(const struct perf_tool *tool,
}
}
- if (al.sym == NULL || !al.sym->idle) {
+ if (al.map)
+ dso = map__dso(al.map);
+
+ if (al.sym == NULL || !symbol__is_idle(al.sym, dso, machine->env)) {
struct hists *hists = evsel__hists(evsel);
struct hist_entry_iter iter = {
.evsel = evsel,
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 3cd4e5a03cc5..9fabf5146d89 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1723,7 +1723,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
arch__sym_update(f, &sym);
- __symbols__insert(dso__symbols(curr_dso), f, dso__kernel(dso));
+ __symbols__insert(dso__symbols(curr_dso), f);
nr++;
}
dso__put(curr_dso);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ce9195717f44..1a357af93a0a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -25,6 +25,8 @@
#include "demangle-ocaml.h"
#include "demangle-rust-v0.h"
#include "dso.h"
+#include "dwarf-regs.h"
+#include "env.h"
#include "util.h" // lsdir()
#include "event.h"
#include "machine.h"
@@ -50,7 +52,6 @@
static int dso__load_kernel_sym(struct dso *dso, struct map *map);
static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
-static bool symbol__is_idle(const char *name);
int vmlinux_path__nr_entries;
char **vmlinux_path;
@@ -357,8 +358,7 @@ void symbols__delete(struct rb_root_cached *symbols)
}
}
-void __symbols__insert(struct rb_root_cached *symbols,
- struct symbol *sym, bool kernel)
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
{
struct rb_node **p = &symbols->rb_root.rb_node;
struct rb_node *parent = NULL;
@@ -366,17 +366,6 @@ void __symbols__insert(struct rb_root_cached *symbols,
struct symbol *s;
bool leftmost = true;
- if (kernel) {
- const char *name = sym->name;
- /*
- * ppc64 uses function descriptors and appends a '.' to the
- * start of every instruction address. Remove it.
- */
- if (name[0] == '.')
- name++;
- sym->idle = symbol__is_idle(name);
- }
-
while (*p != NULL) {
parent = *p;
s = rb_entry(parent, struct symbol, rb_node);
@@ -393,7 +382,7 @@ void __symbols__insert(struct rb_root_cached *symbols,
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
{
- __symbols__insert(symbols, sym, false);
+ __symbols__insert(symbols, sym);
}
static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
@@ -554,7 +543,7 @@ void dso__reset_find_symbol_cache(struct dso *dso)
void dso__insert_symbol(struct dso *dso, struct symbol *sym)
{
- __symbols__insert(dso__symbols(dso), sym, dso__kernel(dso));
+ __symbols__insert(dso__symbols(dso), sym);
/* update the symbol cache if necessary */
if (dso__last_find_result_addr(dso) >= sym->start &&
@@ -716,47 +705,87 @@ int modules__parse(const char *filename, void *arg,
return err;
}
+static int sym_name_cmp(const void *a, const void *b)
+{
+ const char *name = a;
+ const char *const *sym = b;
+
+ return strcmp(name, *sym);
+}
+
/*
* These are symbols in the kernel image, so make sure that
* sym is from a kernel DSO.
*/
-static bool symbol__is_idle(const char *name)
+bool symbol__is_idle(struct symbol *sym, const struct dso *dso, const struct perf_env *env)
{
- const char * const idle_symbols[] = {
+ static const char * const idle_symbols[] = {
"acpi_idle_do_entry",
"acpi_processor_ffh_cstate_enter",
"arch_cpu_idle",
"cpu_idle",
"cpu_startup_entry",
- "idle_cpu",
- "intel_idle",
- "intel_idle_ibrs",
"default_idle",
- "native_safe_halt",
"enter_idle",
"exit_idle",
- "mwait_idle",
- "mwait_idle_with_hints",
- "mwait_idle_with_hints.constprop.0",
+ "idle_cpu",
+ "native_safe_halt",
"poll_idle",
- "ppc64_runlatch_off",
"pseries_dedicated_idle_sleep",
- "psw_idle",
- "psw_idle_exit",
- NULL
};
- int i;
- static struct strlist *idle_symbols_list;
+ const char *name = sym->name;
+ uint16_t e_machine = env ? env->e_machine : EM_HOST;
- if (idle_symbols_list)
- return strlist__has_entry(idle_symbols_list, name);
+ if (sym->idle)
+ return sym->idle == SYMBOL_IDLE__IDLE;
- idle_symbols_list = strlist__new(NULL, NULL);
+ if (!dso || dso__kernel(dso) == DSO_SPACE__USER) {
+ sym->idle = SYMBOL_IDLE__NOT_IDLE;
+ return false;
+ }
- for (i = 0; idle_symbols[i]; i++)
- strlist__add(idle_symbols_list, idle_symbols[i]);
+ /*
+ * ppc64 uses function descriptors and appends a '.' to the
+ * start of every instruction address. Remove it.
+ */
+ if (name[0] == '.')
+ name++;
- return strlist__has_entry(idle_symbols_list, name);
+ if (bsearch(name, idle_symbols, ARRAY_SIZE(idle_symbols),
+ sizeof(idle_symbols[0]), sym_name_cmp)) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+
+ if (e_machine == EM_386 || e_machine == EM_X86_64) {
+ if (strstarts(name, "mwait_idle") ||
+ strstarts(name, "intel_idle")) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+ }
+
+ if (e_machine == EM_PPC64 && !strcmp(name, "ppc64_runlatch_off")) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+
+ if (e_machine == EM_S390) {
+ int major = 0, minor = 0;
+ const char *release = env && env->os_release
+ ? env->os_release : perf_version_string;
+
+ sscanf(release, "%d.%d", &major, &minor);
+
+ /* Before v6.10, s390 used psw_idle. */
+ if ((major < 6 || (major == 6 && minor < 10)) && strstarts(name, "psw_idle")) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+ }
+
+ sym->idle = SYMBOL_IDLE__NOT_IDLE;
+ return false;
}
static int map__process_kallsym_symbol(void *arg, const char *name,
@@ -785,7 +814,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
* We will pass the symbols to the filter later, in
* map__split_kallsyms, when we have split the maps per module
*/
- __symbols__insert(root, sym, !strchr(name, '['));
+ __symbols__insert(root, sym);
return 0;
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index c67814d6d6d6..f26f67bd7982 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -25,6 +25,7 @@ struct dso;
struct map;
struct maps;
struct option;
+struct perf_env;
struct build_id;
/*
@@ -42,6 +43,12 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
GElf_Shdr *shp, const char *name, size_t *idx);
#endif
+enum symbol_idle_kind {
+ SYMBOL_IDLE__UNKNOWN = 0,
+ SYMBOL_IDLE__NOT_IDLE = 1,
+ SYMBOL_IDLE__IDLE = 2,
+};
+
/**
* A symtab entry. When allocated this may be preceded by an annotation (see
* symbol__annotation) and/or a browser_index (see symbol__browser_index).
@@ -57,8 +64,8 @@ struct symbol {
u8 type:4;
/** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
u8 binding:4;
- /** Set true for kernel symbols of idle routines. */
- u8 idle:1;
+ /** Cache for symbol__is_idle. */
+ enum symbol_idle_kind idle:2;
/** Resolvable but tools ignore it (e.g. idle routines). */
u8 ignore:1;
/** Symbol for an inlined function. */
@@ -202,8 +209,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
-void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
- bool kernel);
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols);
void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
@@ -286,5 +292,6 @@ enum {
};
int symbol__validate_sym_arguments(void);
+bool symbol__is_idle(struct symbol *sym, const struct dso *dso, const struct perf_env *env);
#endif /* __PERF_SYMBOL */
--
2.53.0.1018.g2bb0e51243-goog
Hi Ian,
On 3/26/26 12:18 AM, Ian Rogers wrote:
> Move the idle boolean to a helper symbol__is_idle function. In the
> function lazily compute whether a symbol is an idle function taking
> into consideration the kernel version and architecture of the
> machine. As symbols__insert no longer needs to know if a symbol is for
> the kernel, remove the argument.
>
> This change is inspired by mailing list discussion, particularly from
> Thomas Richter <tmricht@linux.ibm.com> and Heiko Carstens
> <hca@linux.ibm.com>:
> https://lore.kernel.org/lkml/20260219113850.354271-1-tmricht@linux.ibm.com/
>
> The change switches x86 matches to use strstarts which means
> intel_idle_irq is matched as part of strstarts(name, "intel_idle"), a
> change suggested by Honglei Wang <jameshongleiwang@126.com> in:
> https://lore.kernel.org/lkml/20260323085255.98173-1-jameshongleiwang@126.com/
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> v1: https://lore.kernel.org/lkml/20260302234343.564937-1-irogers@google.com/
> ---
> tools/perf/builtin-top.c | 6 +-
> tools/perf/util/symbol-elf.c | 2 +-
> tools/perf/util/symbol.c | 105 ++++++++++++++++++++++-------------
> tools/perf/util/symbol.h | 15 +++--
> 4 files changed, 84 insertions(+), 44 deletions(-)
>
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 37950efb28ac..bdc1c761cd61 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -751,6 +751,7 @@ static void perf_event__process_sample(const struct perf_tool *tool,
> {
> struct perf_top *top = container_of(tool, struct perf_top, tool);
> struct addr_location al;
> + struct dso *dso = NULL;
>
> if (!machine && perf_guest) {
> static struct intlist *seen;
> @@ -830,7 +831,10 @@ static void perf_event__process_sample(const struct perf_tool *tool,
> }
> }
>
> - if (al.sym == NULL || !al.sym->idle) {
> + if (al.map)
> + dso = map__dso(al.map);
> +
> + if (al.sym == NULL || !symbol__is_idle(al.sym, dso, machine->env)) {
> struct hists *hists = evsel__hists(evsel);
> struct hist_entry_iter iter = {
> .evsel = evsel,
> diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
> index 3cd4e5a03cc5..9fabf5146d89 100644
> --- a/tools/perf/util/symbol-elf.c
> +++ b/tools/perf/util/symbol-elf.c
> @@ -1723,7 +1723,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
>
> arch__sym_update(f, &sym);
>
> - __symbols__insert(dso__symbols(curr_dso), f, dso__kernel(dso));
> + __symbols__insert(dso__symbols(curr_dso), f);
> nr++;
> }
> dso__put(curr_dso);
> diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
> index ce9195717f44..1a357af93a0a 100644
> --- a/tools/perf/util/symbol.c
> +++ b/tools/perf/util/symbol.c
> @@ -25,6 +25,8 @@
> #include "demangle-ocaml.h"
> #include "demangle-rust-v0.h"
> #include "dso.h"
> +#include "dwarf-regs.h"
> +#include "env.h"
> #include "util.h" // lsdir()
> #include "event.h"
> #include "machine.h"
> @@ -50,7 +52,6 @@
>
> static int dso__load_kernel_sym(struct dso *dso, struct map *map);
> static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
> -static bool symbol__is_idle(const char *name);
>
> int vmlinux_path__nr_entries;
> char **vmlinux_path;
> @@ -357,8 +358,7 @@ void symbols__delete(struct rb_root_cached *symbols)
> }
> }
>
> -void __symbols__insert(struct rb_root_cached *symbols,
> - struct symbol *sym, bool kernel)
> +void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
> {
> struct rb_node **p = &symbols->rb_root.rb_node;
> struct rb_node *parent = NULL;
> @@ -366,17 +366,6 @@ void __symbols__insert(struct rb_root_cached *symbols,
> struct symbol *s;
> bool leftmost = true;
>
> - if (kernel) {
> - const char *name = sym->name;
> - /*
> - * ppc64 uses function descriptors and appends a '.' to the
> - * start of every instruction address. Remove it.
> - */
> - if (name[0] == '.')
> - name++;
> - sym->idle = symbol__is_idle(name);
> - }
> -
> while (*p != NULL) {
> parent = *p;
> s = rb_entry(parent, struct symbol, rb_node);
> @@ -393,7 +382,7 @@ void __symbols__insert(struct rb_root_cached *symbols,
>
> void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
> {
> - __symbols__insert(symbols, sym, false);
> + __symbols__insert(symbols, sym);
> }
>
> static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
> @@ -554,7 +543,7 @@ void dso__reset_find_symbol_cache(struct dso *dso)
>
> void dso__insert_symbol(struct dso *dso, struct symbol *sym)
> {
> - __symbols__insert(dso__symbols(dso), sym, dso__kernel(dso));
> + __symbols__insert(dso__symbols(dso), sym);
>
> /* update the symbol cache if necessary */
> if (dso__last_find_result_addr(dso) >= sym->start &&
> @@ -716,47 +705,87 @@ int modules__parse(const char *filename, void *arg,
> return err;
> }
>
> +static int sym_name_cmp(const void *a, const void *b)
> +{
> + const char *name = a;
> + const char *const *sym = b;
> +
> + return strcmp(name, *sym);
> +}
> +
> /*
> * These are symbols in the kernel image, so make sure that
> * sym is from a kernel DSO.
> */
> -static bool symbol__is_idle(const char *name)
> +bool symbol__is_idle(struct symbol *sym, const struct dso *dso, const struct perf_env *env)
> {
> - const char * const idle_symbols[] = {
> + static const char * const idle_symbols[] = {
> "acpi_idle_do_entry",
> "acpi_processor_ffh_cstate_enter",
> "arch_cpu_idle",
> "cpu_idle",
> "cpu_startup_entry",
> - "idle_cpu",
> - "intel_idle",
> - "intel_idle_ibrs",
> "default_idle",
> - "native_safe_halt",
> "enter_idle",
> "exit_idle",
> - "mwait_idle",
> - "mwait_idle_with_hints",
> - "mwait_idle_with_hints.constprop.0",
> + "idle_cpu",
> + "native_safe_halt",
> "poll_idle",
> - "ppc64_runlatch_off",
> "pseries_dedicated_idle_sleep",
> - "psw_idle",
> - "psw_idle_exit",
> - NULL
> };
> - int i;
> - static struct strlist *idle_symbols_list;
> + const char *name = sym->name;
> + uint16_t e_machine = env ? env->e_machine : EM_HOST;
>
> - if (idle_symbols_list)
> - return strlist__has_entry(idle_symbols_list, name);
> + if (sym->idle)
> + return sym->idle == SYMBOL_IDLE__IDLE;
>
> - idle_symbols_list = strlist__new(NULL, NULL);
> + if (!dso || dso__kernel(dso) == DSO_SPACE__USER) {
> + sym->idle = SYMBOL_IDLE__NOT_IDLE;
> + return false;
> + }
>
> - for (i = 0; idle_symbols[i]; i++)
> - strlist__add(idle_symbols_list, idle_symbols[i]);
> + /*
> + * ppc64 uses function descriptors and appends a '.' to the
> + * start of every instruction address. Remove it.
> + */
> + if (name[0] == '.')
> + name++;
>
> - return strlist__has_entry(idle_symbols_list, name);
> + if (bsearch(name, idle_symbols, ARRAY_SIZE(idle_symbols),
> + sizeof(idle_symbols[0]), sym_name_cmp)) {
> + sym->idle = SYMBOL_IDLE__IDLE;
> + return true;
> + }
> +
> + if (e_machine == EM_386 || e_machine == EM_X86_64) {
As said in anther thread, intel_idle_irq was still there on my test
machine. I did a bit debug and found e_machine == 0 so it couldn't run
into this branch. After dig more, it should be
deliver_event()->perf_session__find_machine() return a struct machine
whose env->e_machine is 0. I'm still busy today to do more, wish this
clue can help.
Thanks,
Honglei
> + if (strstarts(name, "mwait_idle") ||
> + strstarts(name, "intel_idle")) {
> + sym->idle = SYMBOL_IDLE__IDLE;
> + return true;
> + }
> + }
> +
> + if (e_machine == EM_PPC64 && !strcmp(name, "ppc64_runlatch_off")) {
> + sym->idle = SYMBOL_IDLE__IDLE;
> + return true;
> + }
> +
> + if (e_machine == EM_S390) {
> + int major = 0, minor = 0;
> + const char *release = env && env->os_release
> + ? env->os_release : perf_version_string;
> +
> + sscanf(release, "%d.%d", &major, &minor);
> +
> + /* Before v6.10, s390 used psw_idle. */
> + if ((major < 6 || (major == 6 && minor < 10)) && strstarts(name, "psw_idle")) {
> + sym->idle = SYMBOL_IDLE__IDLE;
> + return true;
> + }
> + }
> +
> + sym->idle = SYMBOL_IDLE__NOT_IDLE;
> + return false;
> }
>
> static int map__process_kallsym_symbol(void *arg, const char *name,
> @@ -785,7 +814,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
> * We will pass the symbols to the filter later, in
> * map__split_kallsyms, when we have split the maps per module
> */
> - __symbols__insert(root, sym, !strchr(name, '['));
> + __symbols__insert(root, sym);
>
> return 0;
> }
> diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
> index c67814d6d6d6..f26f67bd7982 100644
> --- a/tools/perf/util/symbol.h
> +++ b/tools/perf/util/symbol.h
> @@ -25,6 +25,7 @@ struct dso;
> struct map;
> struct maps;
> struct option;
> +struct perf_env;
> struct build_id;
>
> /*
> @@ -42,6 +43,12 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
> GElf_Shdr *shp, const char *name, size_t *idx);
> #endif
>
> +enum symbol_idle_kind {
> + SYMBOL_IDLE__UNKNOWN = 0,
> + SYMBOL_IDLE__NOT_IDLE = 1,
> + SYMBOL_IDLE__IDLE = 2,
> +};
> +
> /**
> * A symtab entry. When allocated this may be preceded by an annotation (see
> * symbol__annotation) and/or a browser_index (see symbol__browser_index).
> @@ -57,8 +64,8 @@ struct symbol {
> u8 type:4;
> /** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
> u8 binding:4;
> - /** Set true for kernel symbols of idle routines. */
> - u8 idle:1;
> + /** Cache for symbol__is_idle. */
> + enum symbol_idle_kind idle:2;
> /** Resolvable but tools ignore it (e.g. idle routines). */
> u8 ignore:1;
> /** Symbol for an inlined function. */
> @@ -202,8 +209,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
>
> char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
>
> -void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
> - bool kernel);
> +void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
> void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
> void symbols__fixup_duplicate(struct rb_root_cached *symbols);
> void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
> @@ -286,5 +292,6 @@ enum {
> };
>
> int symbol__validate_sym_arguments(void);
> +bool symbol__is_idle(struct symbol *sym, const struct dso *dso, const struct perf_env *env);
>
> #endif /* __PERF_SYMBOL */
On Thu, Mar 26, 2026 at 12:20 AM Honglei Wang <jameshongleiwang@126.com> wrote:
>
> Hi Ian,
>
> On 3/26/26 12:18 AM, Ian Rogers wrote:
> > Move the idle boolean to a helper symbol__is_idle function. In the
> > function lazily compute whether a symbol is an idle function taking
> > into consideration the kernel version and architecture of the
> > machine. As symbols__insert no longer needs to know if a symbol is for
> > the kernel, remove the argument.
> >
> > This change is inspired by mailing list discussion, particularly from
> > Thomas Richter <tmricht@linux.ibm.com> and Heiko Carstens
> > <hca@linux.ibm.com>:
> > https://lore.kernel.org/lkml/20260219113850.354271-1-tmricht@linux.ibm.com/
> >
> > The change switches x86 matches to use strstarts which means
> > intel_idle_irq is matched as part of strstarts(name, "intel_idle"), a
> > change suggested by Honglei Wang <jameshongleiwang@126.com> in:
> > https://lore.kernel.org/lkml/20260323085255.98173-1-jameshongleiwang@126.com/
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > v1: https://lore.kernel.org/lkml/20260302234343.564937-1-irogers@google.com/
> > ---
> > tools/perf/builtin-top.c | 6 +-
> > tools/perf/util/symbol-elf.c | 2 +-
> > tools/perf/util/symbol.c | 105 ++++++++++++++++++++++-------------
> > tools/perf/util/symbol.h | 15 +++--
> > 4 files changed, 84 insertions(+), 44 deletions(-)
> >
> > diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> > index 37950efb28ac..bdc1c761cd61 100644
> > --- a/tools/perf/builtin-top.c
> > +++ b/tools/perf/builtin-top.c
> > @@ -751,6 +751,7 @@ static void perf_event__process_sample(const struct perf_tool *tool,
> > {
> > struct perf_top *top = container_of(tool, struct perf_top, tool);
> > struct addr_location al;
> > + struct dso *dso = NULL;
> >
> > if (!machine && perf_guest) {
> > static struct intlist *seen;
> > @@ -830,7 +831,10 @@ static void perf_event__process_sample(const struct perf_tool *tool,
> > }
> > }
> >
> > - if (al.sym == NULL || !al.sym->idle) {
> > + if (al.map)
> > + dso = map__dso(al.map);
> > +
> > + if (al.sym == NULL || !symbol__is_idle(al.sym, dso, machine->env)) {
> > struct hists *hists = evsel__hists(evsel);
> > struct hist_entry_iter iter = {
> > .evsel = evsel,
> > diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
> > index 3cd4e5a03cc5..9fabf5146d89 100644
> > --- a/tools/perf/util/symbol-elf.c
> > +++ b/tools/perf/util/symbol-elf.c
> > @@ -1723,7 +1723,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
> >
> > arch__sym_update(f, &sym);
> >
> > - __symbols__insert(dso__symbols(curr_dso), f, dso__kernel(dso));
> > + __symbols__insert(dso__symbols(curr_dso), f);
> > nr++;
> > }
> > dso__put(curr_dso);
> > diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
> > index ce9195717f44..1a357af93a0a 100644
> > --- a/tools/perf/util/symbol.c
> > +++ b/tools/perf/util/symbol.c
> > @@ -25,6 +25,8 @@
> > #include "demangle-ocaml.h"
> > #include "demangle-rust-v0.h"
> > #include "dso.h"
> > +#include "dwarf-regs.h"
> > +#include "env.h"
> > #include "util.h" // lsdir()
> > #include "event.h"
> > #include "machine.h"
> > @@ -50,7 +52,6 @@
> >
> > static int dso__load_kernel_sym(struct dso *dso, struct map *map);
> > static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
> > -static bool symbol__is_idle(const char *name);
> >
> > int vmlinux_path__nr_entries;
> > char **vmlinux_path;
> > @@ -357,8 +358,7 @@ void symbols__delete(struct rb_root_cached *symbols)
> > }
> > }
> >
> > -void __symbols__insert(struct rb_root_cached *symbols,
> > - struct symbol *sym, bool kernel)
> > +void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
> > {
> > struct rb_node **p = &symbols->rb_root.rb_node;
> > struct rb_node *parent = NULL;
> > @@ -366,17 +366,6 @@ void __symbols__insert(struct rb_root_cached *symbols,
> > struct symbol *s;
> > bool leftmost = true;
> >
> > - if (kernel) {
> > - const char *name = sym->name;
> > - /*
> > - * ppc64 uses function descriptors and appends a '.' to the
> > - * start of every instruction address. Remove it.
> > - */
> > - if (name[0] == '.')
> > - name++;
> > - sym->idle = symbol__is_idle(name);
> > - }
> > -
> > while (*p != NULL) {
> > parent = *p;
> > s = rb_entry(parent, struct symbol, rb_node);
> > @@ -393,7 +382,7 @@ void __symbols__insert(struct rb_root_cached *symbols,
> >
> > void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
> > {
> > - __symbols__insert(symbols, sym, false);
> > + __symbols__insert(symbols, sym);
> > }
> >
> > static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
> > @@ -554,7 +543,7 @@ void dso__reset_find_symbol_cache(struct dso *dso)
> >
> > void dso__insert_symbol(struct dso *dso, struct symbol *sym)
> > {
> > - __symbols__insert(dso__symbols(dso), sym, dso__kernel(dso));
> > + __symbols__insert(dso__symbols(dso), sym);
> >
> > /* update the symbol cache if necessary */
> > if (dso__last_find_result_addr(dso) >= sym->start &&
> > @@ -716,47 +705,87 @@ int modules__parse(const char *filename, void *arg,
> > return err;
> > }
> >
> > +static int sym_name_cmp(const void *a, const void *b)
> > +{
> > + const char *name = a;
> > + const char *const *sym = b;
> > +
> > + return strcmp(name, *sym);
> > +}
> > +
> > /*
> > * These are symbols in the kernel image, so make sure that
> > * sym is from a kernel DSO.
> > */
> > -static bool symbol__is_idle(const char *name)
> > +bool symbol__is_idle(struct symbol *sym, const struct dso *dso, const struct perf_env *env)
> > {
> > - const char * const idle_symbols[] = {
> > + static const char * const idle_symbols[] = {
> > "acpi_idle_do_entry",
> > "acpi_processor_ffh_cstate_enter",
> > "arch_cpu_idle",
> > "cpu_idle",
> > "cpu_startup_entry",
> > - "idle_cpu",
> > - "intel_idle",
> > - "intel_idle_ibrs",
> > "default_idle",
> > - "native_safe_halt",
> > "enter_idle",
> > "exit_idle",
> > - "mwait_idle",
> > - "mwait_idle_with_hints",
> > - "mwait_idle_with_hints.constprop.0",
> > + "idle_cpu",
> > + "native_safe_halt",
> > "poll_idle",
> > - "ppc64_runlatch_off",
> > "pseries_dedicated_idle_sleep",
> > - "psw_idle",
> > - "psw_idle_exit",
> > - NULL
> > };
> > - int i;
> > - static struct strlist *idle_symbols_list;
> > + const char *name = sym->name;
> > + uint16_t e_machine = env ? env->e_machine : EM_HOST;
> >
> > - if (idle_symbols_list)
> > - return strlist__has_entry(idle_symbols_list, name);
> > + if (sym->idle)
> > + return sym->idle == SYMBOL_IDLE__IDLE;
> >
> > - idle_symbols_list = strlist__new(NULL, NULL);
> > + if (!dso || dso__kernel(dso) == DSO_SPACE__USER) {
> > + sym->idle = SYMBOL_IDLE__NOT_IDLE;
> > + return false;
> > + }
> >
> > - for (i = 0; idle_symbols[i]; i++)
> > - strlist__add(idle_symbols_list, idle_symbols[i]);
> > + /*
> > + * ppc64 uses function descriptors and appends a '.' to the
> > + * start of every instruction address. Remove it.
> > + */
> > + if (name[0] == '.')
> > + name++;
> >
> > - return strlist__has_entry(idle_symbols_list, name);
> > + if (bsearch(name, idle_symbols, ARRAY_SIZE(idle_symbols),
> > + sizeof(idle_symbols[0]), sym_name_cmp)) {
> > + sym->idle = SYMBOL_IDLE__IDLE;
> > + return true;
> > + }
> > +
> > + if (e_machine == EM_386 || e_machine == EM_X86_64) {
>
> As said in anther thread, intel_idle_irq was still there on my test
> machine. I did a bit debug and found e_machine == 0 so it couldn't run
> into this branch. After dig more, it should be
> deliver_event()->perf_session__find_machine() return a struct machine
> whose env->e_machine is 0. I'm still busy today to do more, wish this
> clue can help.
I can see this, the env's e_machine isn't being lazily initialized for
the host like the arch is. I'll add a patch for this.
Thanks,
Ian
> Thanks,
> Honglei
>
> > + if (strstarts(name, "mwait_idle") ||
> > + strstarts(name, "intel_idle")) {
> > + sym->idle = SYMBOL_IDLE__IDLE;
> > + return true;
> > + }
> > + }
> > +
> > + if (e_machine == EM_PPC64 && !strcmp(name, "ppc64_runlatch_off")) {
> > + sym->idle = SYMBOL_IDLE__IDLE;
> > + return true;
> > + }
> > +
> > + if (e_machine == EM_S390) {
> > + int major = 0, minor = 0;
> > + const char *release = env && env->os_release
> > + ? env->os_release : perf_version_string;
> > +
> > + sscanf(release, "%d.%d", &major, &minor);
> > +
> > + /* Before v6.10, s390 used psw_idle. */
> > + if ((major < 6 || (major == 6 && minor < 10)) && strstarts(name, "psw_idle")) {
> > + sym->idle = SYMBOL_IDLE__IDLE;
> > + return true;
> > + }
> > + }
> > +
> > + sym->idle = SYMBOL_IDLE__NOT_IDLE;
> > + return false;
> > }
> >
> > static int map__process_kallsym_symbol(void *arg, const char *name,
> > @@ -785,7 +814,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
> > * We will pass the symbols to the filter later, in
> > * map__split_kallsyms, when we have split the maps per module
> > */
> > - __symbols__insert(root, sym, !strchr(name, '['));
> > + __symbols__insert(root, sym);
> >
> > return 0;
> > }
> > diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
> > index c67814d6d6d6..f26f67bd7982 100644
> > --- a/tools/perf/util/symbol.h
> > +++ b/tools/perf/util/symbol.h
> > @@ -25,6 +25,7 @@ struct dso;
> > struct map;
> > struct maps;
> > struct option;
> > +struct perf_env;
> > struct build_id;
> >
> > /*
> > @@ -42,6 +43,12 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
> > GElf_Shdr *shp, const char *name, size_t *idx);
> > #endif
> >
> > +enum symbol_idle_kind {
> > + SYMBOL_IDLE__UNKNOWN = 0,
> > + SYMBOL_IDLE__NOT_IDLE = 1,
> > + SYMBOL_IDLE__IDLE = 2,
> > +};
> > +
> > /**
> > * A symtab entry. When allocated this may be preceded by an annotation (see
> > * symbol__annotation) and/or a browser_index (see symbol__browser_index).
> > @@ -57,8 +64,8 @@ struct symbol {
> > u8 type:4;
> > /** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
> > u8 binding:4;
> > - /** Set true for kernel symbols of idle routines. */
> > - u8 idle:1;
> > + /** Cache for symbol__is_idle. */
> > + enum symbol_idle_kind idle:2;
> > /** Resolvable but tools ignore it (e.g. idle routines). */
> > u8 ignore:1;
> > /** Symbol for an inlined function. */
> > @@ -202,8 +209,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
> >
> > char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
> >
> > -void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
> > - bool kernel);
> > +void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
> > void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
> > void symbols__fixup_duplicate(struct rb_root_cached *symbols);
> > void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
> > @@ -286,5 +292,6 @@ enum {
> > };
> >
> > int symbol__validate_sym_arguments(void);
> > +bool symbol__is_idle(struct symbol *sym, const struct dso *dso, const struct perf_env *env);
> >
> > #endif /* __PERF_SYMBOL */
>
Add a helper to perf_env to compute the e_machine if it is
EM_NONE. Derive the value from the arch string if available. Similarly
derive the arch string from the ELF machine if available, for
consistency. This means perf's arch (machine type) is no longer
determined by uname but set to match that of the perf ELF executable.
Switch the idle computation to the point of use and lazily compute it,
rather than computing it for every symbol. The current only user is
`perf top`. At the point of use the perf_env is available and this can
be used to make sure the idle function computation is machine and
kernel version dependent.
v3: Properly set up the e_machine coming from the perf_env as reported
by Honglei Wang.
v2: Some minor white space clean up:
https://lore.kernel.org/lkml/20260325161836.1029457-1-irogers@google.com/
v1: https://lore.kernel.org/lkml/20260302234343.564937-1-irogers@google.com/
Ian Rogers (2):
perf env: Add perf_env__e_machine helper and use in perf_env__arch
perf symbol: Lazily compute idle and use the perf_env
tools/perf/builtin-top.c | 6 +-
tools/perf/util/env.c | 179 +++++++++++++++++++++++++++--------
tools/perf/util/env.h | 1 +
tools/perf/util/session.c | 14 +--
tools/perf/util/symbol-elf.c | 2 +-
tools/perf/util/symbol.c | 105 ++++++++++++--------
tools/perf/util/symbol.h | 15 ++-
7 files changed, 235 insertions(+), 87 deletions(-)
--
2.53.0.1018.g2bb0e51243-goog
Add a helper to perf_env to compute the e_machine if it is
EM_NONE. Derive the value from the arch string if available. Similarly
derive the arch string from the ELF machine if available, for
consistency. This means perf's arch (machine type) is no longer
determined by uname but set to match that of the perf ELF executable.
Switch the idle computation to the point of use and lazily compute it,
rather than computing it for every symbol. The current only user is
`perf top`. At the point of use the perf_env is available and this can
be used to make sure the idle function computation is machine and
kernel version dependent.
v4: Fix Sashiko issues where an array element wasn't sorted properly,
the e_flags weren't returned properly, the idle type is change to
a u8 rather than an enum value and the s390 version check for
psw_idle is slightly reordered and tweaked.
v3: Properly set up the e_machine coming from the perf_env as reported
by Honglei Wang.
https://lore.kernel.org/lkml/20260326174521.1829203-1-irogers@google.com/
v2: Some minor white space clean up:
https://lore.kernel.org/lkml/20260325161836.1029457-1-irogers@google.com/
v1: https://lore.kernel.org/lkml/20260302234343.564937-1-irogers@google.com/
Ian Rogers (2):
perf env: Add perf_env__e_machine helper and use in perf_env__arch
perf symbol: Lazily compute idle and use the perf_env
tools/perf/builtin-top.c | 6 +-
tools/perf/util/env.c | 185 ++++++++++++++++++++++++++++-------
tools/perf/util/env.h | 1 +
tools/perf/util/session.c | 14 +--
tools/perf/util/symbol-elf.c | 2 +-
tools/perf/util/symbol.c | 104 +++++++++++++-------
tools/perf/util/symbol.h | 15 ++-
7 files changed, 240 insertions(+), 87 deletions(-)
--
2.53.0.1018.g2bb0e51243-goog
Add a helper that lazily computes the e_machine and falls back of
EM_HOST. Use the perf_env's arch to compute the e_machine if
available. Use a binary search for some efficiency in this, but handle
somewhat complex duplicate rules. Switch perf_env__arch to be derived
the e_machine for consistency. This switches arch from being uname
derived to matching that of the perf binary (via EM_HOST). Update
session to use the helper, which may mean using EM_HOST when no
threads are available. This also updates the perf data file header
that gets the e_machine/e_flags from the session.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/env.c | 185 ++++++++++++++++++++++++++++++--------
tools/perf/util/env.h | 1 +
tools/perf/util/session.c | 14 +--
3 files changed, 157 insertions(+), 43 deletions(-)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 93d475a80f14..ae08178870d7 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include "cpumap.h"
+#include "dwarf-regs.h"
#include "debug.h"
#include "env.h"
#include "util/header.h"
#include "util/rwsem.h"
#include <linux/compiler.h>
+#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/rbtree.h>
#include <linux/string.h>
@@ -588,51 +590,160 @@ void cpu_cache_level__free(struct cpu_cache_level *cache)
zfree(&cache->size);
}
+struct arch_to_e_machine {
+ const char *prefix;
+ uint16_t e_machine;
+};
+
/*
- * Return architecture name in a normalized form.
- * The conversion logic comes from the Makefile.
+ * A mapping from an arch prefix string to an ELF machine that can be used in a
+ * bsearch. Some arch prefixes are shared an need additional processing as
+ * marked next to the architecture. The prefixes handle both perf's architecture
+ * naming and those from uname.
*/
-static const char *normalize_arch(char *arch)
-{
- if (!strcmp(arch, "x86_64"))
- return "x86";
- if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
- return "x86";
- if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
- return "sparc";
- if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5))
- return "arm64";
- if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
- return "arm";
- if (!strncmp(arch, "s390", 4))
- return "s390";
- if (!strncmp(arch, "parisc", 6))
- return "parisc";
- if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
- return "powerpc";
- if (!strncmp(arch, "mips", 4))
- return "mips";
- if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
- return "sh";
- if (!strncmp(arch, "loongarch", 9))
- return "loongarch";
-
- return arch;
+static const struct arch_to_e_machine prefix_to_e_machine[] = {
+ {"aarch64", EM_AARCH64},
+ {"alpha", EM_ALPHA},
+ {"arc", EM_ARC},
+ {"arm", EM_ARM}, /* Check also for EM_AARCH64. */
+ {"avr", EM_AVR}, /* Check also for EM_AVR32. */
+ {"bfin", EM_BLACKFIN},
+ {"blackfin", EM_BLACKFIN},
+ {"cris", EM_CRIS},
+ {"csky", EM_CSKY},
+ {"hppa", EM_PARISC},
+ {"i386", EM_386},
+ {"i486", EM_386},
+ {"i586", EM_386},
+ {"i686", EM_386},
+ {"loongarch", EM_LOONGARCH},
+ {"m32r", EM_M32R},
+ {"m68k", EM_68K},
+ {"microblaze", EM_MICROBLAZE},
+ {"mips", EM_MIPS},
+ {"msp430", EM_MSP430},
+ {"parisc", EM_PARISC},
+ {"powerpc", EM_PPC}, /* Check also for EM_PPC64. */
+ {"ppc", EM_PPC}, /* Check also for EM_PPC64. */
+ {"riscv", EM_RISCV},
+ {"s390", EM_S390},
+ {"sa110", EM_ARM},
+ {"sh", EM_SH},
+ {"sparc", EM_SPARC}, /* Check also for EM_SPARCV9. */
+ {"sun4u", EM_SPARC},
+ {"x86", EM_X86_64}, /* Check also for EM_386. */
+ {"xtensa", EM_XTENSA},
+};
+
+static int compare_prefix(const void *key, const void *element)
+{
+ const char *search_key = key;
+ const struct arch_to_e_machine *map_element = element;
+ size_t prefix_len = strlen(map_element->prefix);
+
+ return strncmp(search_key, map_element->prefix, prefix_len);
+}
+
+static uint16_t perf_arch_to_e_machine(const char *perf_arch, bool is_64_bit)
+{
+ /* Binary search for a matching prefix. */
+ const struct arch_to_e_machine *result;
+
+ if (!perf_arch)
+ return EM_HOST;
+
+ result = bsearch(perf_arch,
+ prefix_to_e_machine, ARRAY_SIZE(prefix_to_e_machine),
+ sizeof(prefix_to_e_machine[0]),
+ compare_prefix);
+
+ if (!result) {
+ pr_debug("Unknown perf arch for ELF machine mapping: %s\n", perf_arch);
+ return EM_NONE;
+ }
+
+ /* Handle conflicting prefixes. */
+ switch (result->e_machine) {
+ case EM_ARM:
+ return !strcmp(perf_arch, "arm64") ? EM_AARCH64 : EM_ARM;
+ case EM_AVR:
+ return !strcmp(perf_arch, "avr32") ? EM_AVR32 : EM_AVR;
+ case EM_PPC:
+ return is_64_bit || strstarts(perf_arch, "ppc64") ? EM_PPC64 : EM_PPC;
+ case EM_SPARC:
+ return is_64_bit || !strcmp(perf_arch, "sparc64") ? EM_SPARCV9 : EM_SPARC;
+ case EM_X86_64:
+ return is_64_bit || !strcmp(perf_arch, "x86_64") ? EM_X86_64 : EM_386;
+ default:
+ return result->e_machine;
+ }
+}
+
+static const char *e_machine_to_perf_arch(uint16_t e_machine)
+{
+ /*
+ * Table for if either the perf arch string differs from uname or there
+ * are >1 ELF machine with the prefix.
+ */
+ static const struct arch_to_e_machine extras[] = {
+ {"arm64", EM_AARCH64},
+ {"avr32", EM_AVR32},
+ {"powerpc", EM_PPC},
+ {"powerpc", EM_PPC64},
+ {"sparc", EM_SPARCV9},
+ {"x86", EM_386},
+ {"x86", EM_X86_64},
+ {"none", EM_NONE},
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(extras); i++) {
+ if (extras[i].e_machine == e_machine)
+ return extras[i].prefix;
+ }
+
+ for (size_t i = 0; i < ARRAY_SIZE(prefix_to_e_machine); i++) {
+ if (prefix_to_e_machine[i].e_machine == e_machine)
+ return prefix_to_e_machine[i].prefix;
+
+ }
+ return "unknown";
+}
+
+uint16_t perf_env__e_machine(struct perf_env *env, uint32_t *e_flags)
+{
+ if (!env) {
+ if (e_flags)
+ *e_flags = EF_HOST;
+
+ return EM_HOST;
+ }
+ if (env->e_machine == EM_NONE) {
+ env->e_machine = perf_arch_to_e_machine(env->arch, env->kernel_is_64_bit);
+
+ if (env->e_machine == EM_HOST)
+ env->e_flags = EF_HOST;
+ }
+ if (e_flags)
+ *e_flags = env->e_flags;
+
+ return env->e_machine;
}
const char *perf_env__arch(struct perf_env *env)
{
- char *arch_name;
+ if (!env)
+ return e_machine_to_perf_arch(EM_HOST);
- if (!env || !env->arch) { /* Assume local operation */
- static struct utsname uts = { .machine[0] = '\0', };
- if (uts.machine[0] == '\0' && uname(&uts) < 0)
- return NULL;
- arch_name = uts.machine;
- } else
- arch_name = env->arch;
+ if (!env->arch) {
+ /*
+ * Lazily compute/allocate arch. The e_machine may have been
+ * read from a data file and so may not be EM_HOST.
+ */
+ uint16_t e_machine = perf_env__e_machine(env, /*e_flags=*/NULL);
- return normalize_arch(arch_name);
+ env->arch = strdup(e_machine_to_perf_arch(e_machine));
+ }
+ return env->arch;
}
#if defined(HAVE_LIBTRACEEVENT)
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index a4501cbca375..91ff252712f4 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -186,6 +186,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env);
void cpu_cache_level__free(struct cpu_cache_level *cache);
+uint16_t perf_env__e_machine(struct perf_env *env, uint32_t *e_flags);
const char *perf_env__arch(struct perf_env *env);
const char *perf_env__arch_strerrno(struct perf_env *env, int err);
const char *perf_env__cpuid(struct perf_env *env);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4b465abfa36c..dcc9bef303aa 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2996,14 +2996,16 @@ uint16_t perf_session__e_machine(struct perf_session *session, uint32_t *e_flags
return EM_HOST;
}
+ /* Is the env caching an e_machine? */
env = perf_session__env(session);
- if (env && env->e_machine != EM_NONE) {
- if (e_flags)
- *e_flags = env->e_flags;
-
- return env->e_machine;
- }
+ if (env && env->e_machine != EM_NONE)
+ return perf_env__e_machine(env, e_flags);
+ /*
+ * Compute from threads, note this is more accurate than
+ * perf_env__e_machine that falls back on EM_HOST and doesn't consider
+ * mixed 32-bit and 64-bit threads.
+ */
machines__for_each_thread(&session->machines,
perf_session__e_machine_cb,
&args);
--
2.53.0.1018.g2bb0e51243-goog
Move the idle boolean to a helper symbol__is_idle function. In the
function lazily compute whether a symbol is an idle function taking
into consideration the kernel version and architecture of the
machine. As symbols__insert no longer needs to know if a symbol is for
the kernel, remove the argument.
This change is inspired by mailing list discussion, particularly from
Thomas Richter <tmricht@linux.ibm.com> and Heiko Carstens
<hca@linux.ibm.com>:
https://lore.kernel.org/lkml/20260219113850.354271-1-tmricht@linux.ibm.com/
The change switches x86 matches to use strstarts which means
intel_idle_irq is matched as part of strstarts(name, "intel_idle"), a
change suggested by Honglei Wang <jameshongleiwang@126.com> in:
https://lore.kernel.org/lkml/20260323085255.98173-1-jameshongleiwang@126.com/
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/builtin-top.c | 6 +-
tools/perf/util/symbol-elf.c | 2 +-
tools/perf/util/symbol.c | 104 ++++++++++++++++++++++-------------
tools/perf/util/symbol.h | 15 +++--
4 files changed, 83 insertions(+), 44 deletions(-)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 37950efb28ac..bdc1c761cd61 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -751,6 +751,7 @@ static void perf_event__process_sample(const struct perf_tool *tool,
{
struct perf_top *top = container_of(tool, struct perf_top, tool);
struct addr_location al;
+ struct dso *dso = NULL;
if (!machine && perf_guest) {
static struct intlist *seen;
@@ -830,7 +831,10 @@ static void perf_event__process_sample(const struct perf_tool *tool,
}
}
- if (al.sym == NULL || !al.sym->idle) {
+ if (al.map)
+ dso = map__dso(al.map);
+
+ if (al.sym == NULL || !symbol__is_idle(al.sym, dso, machine->env)) {
struct hists *hists = evsel__hists(evsel);
struct hist_entry_iter iter = {
.evsel = evsel,
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 3cd4e5a03cc5..9fabf5146d89 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1723,7 +1723,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
arch__sym_update(f, &sym);
- __symbols__insert(dso__symbols(curr_dso), f, dso__kernel(dso));
+ __symbols__insert(dso__symbols(curr_dso), f);
nr++;
}
dso__put(curr_dso);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ce9195717f44..9ff709edeb88 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -25,6 +25,8 @@
#include "demangle-ocaml.h"
#include "demangle-rust-v0.h"
#include "dso.h"
+#include "dwarf-regs.h"
+#include "env.h"
#include "util.h" // lsdir()
#include "event.h"
#include "machine.h"
@@ -50,7 +52,6 @@
static int dso__load_kernel_sym(struct dso *dso, struct map *map);
static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
-static bool symbol__is_idle(const char *name);
int vmlinux_path__nr_entries;
char **vmlinux_path;
@@ -357,8 +358,7 @@ void symbols__delete(struct rb_root_cached *symbols)
}
}
-void __symbols__insert(struct rb_root_cached *symbols,
- struct symbol *sym, bool kernel)
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
{
struct rb_node **p = &symbols->rb_root.rb_node;
struct rb_node *parent = NULL;
@@ -366,17 +366,6 @@ void __symbols__insert(struct rb_root_cached *symbols,
struct symbol *s;
bool leftmost = true;
- if (kernel) {
- const char *name = sym->name;
- /*
- * ppc64 uses function descriptors and appends a '.' to the
- * start of every instruction address. Remove it.
- */
- if (name[0] == '.')
- name++;
- sym->idle = symbol__is_idle(name);
- }
-
while (*p != NULL) {
parent = *p;
s = rb_entry(parent, struct symbol, rb_node);
@@ -393,7 +382,7 @@ void __symbols__insert(struct rb_root_cached *symbols,
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
{
- __symbols__insert(symbols, sym, false);
+ __symbols__insert(symbols, sym);
}
static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
@@ -554,7 +543,7 @@ void dso__reset_find_symbol_cache(struct dso *dso)
void dso__insert_symbol(struct dso *dso, struct symbol *sym)
{
- __symbols__insert(dso__symbols(dso), sym, dso__kernel(dso));
+ __symbols__insert(dso__symbols(dso), sym);
/* update the symbol cache if necessary */
if (dso__last_find_result_addr(dso) >= sym->start &&
@@ -716,47 +705,86 @@ int modules__parse(const char *filename, void *arg,
return err;
}
+static int sym_name_cmp(const void *a, const void *b)
+{
+ const char *name = a;
+ const char *const *sym = b;
+
+ return strcmp(name, *sym);
+}
+
/*
* These are symbols in the kernel image, so make sure that
* sym is from a kernel DSO.
*/
-static bool symbol__is_idle(const char *name)
+bool symbol__is_idle(struct symbol *sym, const struct dso *dso, struct perf_env *env)
{
- const char * const idle_symbols[] = {
+ static const char * const idle_symbols[] = {
"acpi_idle_do_entry",
"acpi_processor_ffh_cstate_enter",
"arch_cpu_idle",
"cpu_idle",
"cpu_startup_entry",
- "idle_cpu",
- "intel_idle",
- "intel_idle_ibrs",
"default_idle",
- "native_safe_halt",
"enter_idle",
"exit_idle",
- "mwait_idle",
- "mwait_idle_with_hints",
- "mwait_idle_with_hints.constprop.0",
+ "idle_cpu",
+ "native_safe_halt",
"poll_idle",
- "ppc64_runlatch_off",
"pseries_dedicated_idle_sleep",
- "psw_idle",
- "psw_idle_exit",
- NULL
};
- int i;
- static struct strlist *idle_symbols_list;
+ const char *name = sym->name;
+ uint16_t e_machine = perf_env__e_machine(env, /*e_flags=*/NULL);
+
+ if (sym->idle)
+ return sym->idle == SYMBOL_IDLE__IDLE;
+
+ if (!dso || dso__kernel(dso) == DSO_SPACE__USER) {
+ sym->idle = SYMBOL_IDLE__NOT_IDLE;
+ return false;
+ }
- if (idle_symbols_list)
- return strlist__has_entry(idle_symbols_list, name);
+ /*
+ * ppc64 uses function descriptors and appends a '.' to the
+ * start of every instruction address. Remove it.
+ */
+ if (name[0] == '.')
+ name++;
- idle_symbols_list = strlist__new(NULL, NULL);
+ if (bsearch(name, idle_symbols, ARRAY_SIZE(idle_symbols),
+ sizeof(idle_symbols[0]), sym_name_cmp)) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
- for (i = 0; idle_symbols[i]; i++)
- strlist__add(idle_symbols_list, idle_symbols[i]);
+ if (e_machine == EM_386 || e_machine == EM_X86_64) {
+ if (strstarts(name, "mwait_idle") ||
+ strstarts(name, "intel_idle")) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+ }
- return strlist__has_entry(idle_symbols_list, name);
+ if (e_machine == EM_PPC64 && !strcmp(name, "ppc64_runlatch_off")) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+
+ if (e_machine == EM_S390 && strstarts(name, "psw_idle")) {
+ int major = 0, minor = 0;
+ const char *release = env && env->os_release
+ ? env->os_release : perf_version_string;
+
+ /* Before v6.10, s390 used psw_idle. */
+ if (sscanf(release, "%d.%d", &major, &minor) != 2 ||
+ major < 6 || (major == 6 && minor < 10)) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+ }
+
+ sym->idle = SYMBOL_IDLE__NOT_IDLE;
+ return false;
}
static int map__process_kallsym_symbol(void *arg, const char *name,
@@ -785,7 +813,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
* We will pass the symbols to the filter later, in
* map__split_kallsyms, when we have split the maps per module
*/
- __symbols__insert(root, sym, !strchr(name, '['));
+ __symbols__insert(root, sym);
return 0;
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index c67814d6d6d6..2f5f90f547aa 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -25,6 +25,7 @@ struct dso;
struct map;
struct maps;
struct option;
+struct perf_env;
struct build_id;
/*
@@ -42,6 +43,12 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
GElf_Shdr *shp, const char *name, size_t *idx);
#endif
+enum symbol_idle_kind {
+ SYMBOL_IDLE__UNKNOWN = 0,
+ SYMBOL_IDLE__NOT_IDLE = 1,
+ SYMBOL_IDLE__IDLE = 2,
+};
+
/**
* A symtab entry. When allocated this may be preceded by an annotation (see
* symbol__annotation) and/or a browser_index (see symbol__browser_index).
@@ -57,8 +64,8 @@ struct symbol {
u8 type:4;
/** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
u8 binding:4;
- /** Set true for kernel symbols of idle routines. */
- u8 idle:1;
+ /** Cache for symbol__is_idle holding enum symbol_idle_kind values. */
+ u8 idle:2;
/** Resolvable but tools ignore it (e.g. idle routines). */
u8 ignore:1;
/** Symbol for an inlined function. */
@@ -202,8 +209,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
-void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
- bool kernel);
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols);
void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
@@ -286,5 +292,6 @@ enum {
};
int symbol__validate_sym_arguments(void);
+bool symbol__is_idle(struct symbol *sym, const struct dso *dso, struct perf_env *env);
#endif /* __PERF_SYMBOL */
--
2.53.0.1018.g2bb0e51243-goog
Add a helper that lazily computes the e_machine and falls back of
EM_HOST. Use the perf_env's arch to compute the e_machine if
available. Use a binary search for some efficiency in this, but handle
somewhat complex duplicate rules. Switch perf_env__arch to be derived
the e_machine for consistency. This switches arch from being uname
derived to matching that of the perf binary (via EM_HOST). Update
session to use the helper, which may mean using EM_HOST when no
threads are available. This also updates the perf data file header
that gets the e_machine/e_flags from the session.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/env.c | 179 ++++++++++++++++++++++++++++++--------
tools/perf/util/env.h | 1 +
tools/perf/util/session.c | 14 +--
3 files changed, 151 insertions(+), 43 deletions(-)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 93d475a80f14..304bd8245485 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include "cpumap.h"
+#include "dwarf-regs.h"
#include "debug.h"
#include "env.h"
#include "util/header.h"
#include "util/rwsem.h"
#include <linux/compiler.h>
+#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/rbtree.h>
#include <linux/string.h>
@@ -588,51 +590,154 @@ void cpu_cache_level__free(struct cpu_cache_level *cache)
zfree(&cache->size);
}
+struct arch_to_e_machine {
+ const char *prefix;
+ uint16_t e_machine;
+};
+
/*
- * Return architecture name in a normalized form.
- * The conversion logic comes from the Makefile.
+ * A mapping from an arch prefix string to an ELF machine that can be used in a
+ * bsearch. Some arch prefixes are shared an need additional processing as
+ * marked next to the architecture. The prefixes handle both perf's architecture
+ * naming and those from uname.
*/
-static const char *normalize_arch(char *arch)
-{
- if (!strcmp(arch, "x86_64"))
- return "x86";
- if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
- return "x86";
- if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
- return "sparc";
- if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5))
- return "arm64";
- if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
- return "arm";
- if (!strncmp(arch, "s390", 4))
- return "s390";
- if (!strncmp(arch, "parisc", 6))
- return "parisc";
- if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
- return "powerpc";
- if (!strncmp(arch, "mips", 4))
- return "mips";
- if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
- return "sh";
- if (!strncmp(arch, "loongarch", 9))
- return "loongarch";
-
- return arch;
+static const struct arch_to_e_machine prefix_to_e_machine[] = {
+ {"aarch64", EM_AARCH64},
+ {"alpha", EM_ALPHA},
+ {"arc", EM_ARC},
+ {"arm", EM_ARM}, /* Check also for EM_AARCH64. */
+ {"avr", EM_AVR}, /* Check also for EM_AVR32. */
+ {"bfin", EM_BLACKFIN},
+ {"blackfin", EM_BLACKFIN},
+ {"cris", EM_CRIS},
+ {"csky", EM_CSKY},
+ {"hppa", EM_PARISC},
+ {"i386", EM_386},
+ {"i486", EM_386},
+ {"i586", EM_386},
+ {"i686", EM_386},
+ {"loongarch", EM_LOONGARCH},
+ {"m32r", EM_M32R},
+ {"m68k", EM_68K},
+ {"microblaze", EM_MICROBLAZE},
+ {"mips", EM_MIPS},
+ {"msp430", EM_MSP430},
+ {"parisc", EM_PARISC},
+ {"powerpc", EM_PPC}, /* Check also for EM_PPC64. */
+ {"ppc", EM_PPC}, /* Check also for EM_PPC64. */
+ {"riscv", EM_RISCV},
+ {"sa110", EM_ARM},
+ {"s390", EM_S390},
+ {"sh", EM_SH},
+ {"sparc", EM_SPARC}, /* Check also for EM_SPARCV9. */
+ {"sun4u", EM_SPARC},
+ {"x86", EM_X86_64}, /* Check also for EM_386. */
+ {"xtensa", EM_XTENSA},
+};
+
+static int compare_prefix(const void *key, const void *element)
+{
+ const char *search_key = key;
+ const struct arch_to_e_machine *map_element = element;
+ size_t prefix_len = strlen(map_element->prefix);
+
+ return strncmp(search_key, map_element->prefix, prefix_len);
+}
+
+static uint16_t perf_arch_to_e_machine(const char *perf_arch, bool is_64_bit)
+{
+ /* Binary search for a matching prefix. */
+ const struct arch_to_e_machine *result;
+
+ if (!perf_arch)
+ return EM_HOST;
+
+ result = bsearch(perf_arch,
+ prefix_to_e_machine, ARRAY_SIZE(prefix_to_e_machine),
+ sizeof(prefix_to_e_machine[0]),
+ compare_prefix);
+
+ if (!result) {
+ pr_debug("Unknown perf arch for ELF machine mapping: %s\n", perf_arch);
+ return EM_NONE;
+ }
+
+ /* Handle conflicting prefixes. */
+ switch (result->e_machine) {
+ case EM_ARM:
+ return !strcmp(perf_arch, "arm64") ? EM_AARCH64 : EM_ARM;
+ case EM_AVR:
+ return !strcmp(perf_arch, "avr32") ? EM_AVR32 : EM_AVR;
+ case EM_PPC:
+ return is_64_bit || strstarts(perf_arch, "ppc64") ? EM_PPC64 : EM_PPC;
+ case EM_SPARC:
+ return is_64_bit || !strcmp(perf_arch, "sparc64") ? EM_SPARCV9 : EM_SPARC;
+ case EM_X86_64:
+ return is_64_bit || !strcmp(perf_arch, "x86_64") ? EM_X86_64 : EM_386;
+ default:
+ return result->e_machine;
+ }
+}
+
+static const char *e_machine_to_perf_arch(uint16_t e_machine)
+{
+ /*
+ * Table for if either the perf arch string differs from uname or there
+ * are >1 ELF machine with the prefix.
+ */
+ static const struct arch_to_e_machine extras[] = {
+ {"arm64", EM_AARCH64},
+ {"avr32", EM_AVR32},
+ {"powerpc", EM_PPC},
+ {"powerpc", EM_PPC64},
+ {"sparc", EM_SPARCV9},
+ {"x86", EM_386},
+ {"x86", EM_X86_64},
+ {"none", EM_NONE},
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(extras); i++) {
+ if (extras[i].e_machine == e_machine)
+ return extras[i].prefix;
+ }
+
+ for (size_t i = 0; i < ARRAY_SIZE(prefix_to_e_machine); i++) {
+ if (prefix_to_e_machine[i].e_machine == e_machine)
+ return prefix_to_e_machine[i].prefix;
+
+ }
+ return "unknown";
+}
+
+uint16_t perf_env__e_machine(struct perf_env *env, uint32_t *e_flags)
+{
+ if (!env) {
+ if (e_flags)
+ *e_flags = EF_HOST;
+
+ return EM_HOST;
+ }
+ if (env->e_machine == EM_NONE) {
+ env->e_machine = perf_arch_to_e_machine(env->arch, env->kernel_is_64_bit);
+
+ if (env->e_machine == EM_HOST)
+ env->e_flags = EF_HOST;
+ }
+ if (e_flags)
+ *e_flags = EF_HOST;
+
+ return env->e_machine;
}
const char *perf_env__arch(struct perf_env *env)
{
- char *arch_name;
+ if (!env)
+ return e_machine_to_perf_arch(EM_HOST);
- if (!env || !env->arch) { /* Assume local operation */
- static struct utsname uts = { .machine[0] = '\0', };
- if (uts.machine[0] == '\0' && uname(&uts) < 0)
- return NULL;
- arch_name = uts.machine;
- } else
- arch_name = env->arch;
+ if (!env->arch)
+ env->arch = strdup(e_machine_to_perf_arch(perf_env__e_machine(env, /*e_flags=*/NULL)));
- return normalize_arch(arch_name);
+ return env->arch;
}
#if defined(HAVE_LIBTRACEEVENT)
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index a4501cbca375..91ff252712f4 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -186,6 +186,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env);
void cpu_cache_level__free(struct cpu_cache_level *cache);
+uint16_t perf_env__e_machine(struct perf_env *env, uint32_t *e_flags);
const char *perf_env__arch(struct perf_env *env);
const char *perf_env__arch_strerrno(struct perf_env *env, int err);
const char *perf_env__cpuid(struct perf_env *env);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4b465abfa36c..dcc9bef303aa 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2996,14 +2996,16 @@ uint16_t perf_session__e_machine(struct perf_session *session, uint32_t *e_flags
return EM_HOST;
}
+ /* Is the env caching an e_machine? */
env = perf_session__env(session);
- if (env && env->e_machine != EM_NONE) {
- if (e_flags)
- *e_flags = env->e_flags;
-
- return env->e_machine;
- }
+ if (env && env->e_machine != EM_NONE)
+ return perf_env__e_machine(env, e_flags);
+ /*
+ * Compute from threads, note this is more accurate than
+ * perf_env__e_machine that falls back on EM_HOST and doesn't consider
+ * mixed 32-bit and 64-bit threads.
+ */
machines__for_each_thread(&session->machines,
perf_session__e_machine_cb,
&args);
--
2.53.0.1018.g2bb0e51243-goog
Move the idle boolean to a helper symbol__is_idle function. In the
function lazily compute whether a symbol is an idle function taking
into consideration the kernel version and architecture of the
machine. As symbols__insert no longer needs to know if a symbol is for
the kernel, remove the argument.
This change is inspired by mailing list discussion, particularly from
Thomas Richter <tmricht@linux.ibm.com> and Heiko Carstens
<hca@linux.ibm.com>:
https://lore.kernel.org/lkml/20260219113850.354271-1-tmricht@linux.ibm.com/
The change switches x86 matches to use strstarts which means
intel_idle_irq is matched as part of strstarts(name, "intel_idle"), a
change suggested by Honglei Wang <jameshongleiwang@126.com> in:
https://lore.kernel.org/lkml/20260323085255.98173-1-jameshongleiwang@126.com/
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/builtin-top.c | 6 +-
tools/perf/util/symbol-elf.c | 2 +-
tools/perf/util/symbol.c | 105 ++++++++++++++++++++++-------------
tools/perf/util/symbol.h | 15 +++--
4 files changed, 84 insertions(+), 44 deletions(-)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 37950efb28ac..bdc1c761cd61 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -751,6 +751,7 @@ static void perf_event__process_sample(const struct perf_tool *tool,
{
struct perf_top *top = container_of(tool, struct perf_top, tool);
struct addr_location al;
+ struct dso *dso = NULL;
if (!machine && perf_guest) {
static struct intlist *seen;
@@ -830,7 +831,10 @@ static void perf_event__process_sample(const struct perf_tool *tool,
}
}
- if (al.sym == NULL || !al.sym->idle) {
+ if (al.map)
+ dso = map__dso(al.map);
+
+ if (al.sym == NULL || !symbol__is_idle(al.sym, dso, machine->env)) {
struct hists *hists = evsel__hists(evsel);
struct hist_entry_iter iter = {
.evsel = evsel,
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 3cd4e5a03cc5..9fabf5146d89 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1723,7 +1723,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
arch__sym_update(f, &sym);
- __symbols__insert(dso__symbols(curr_dso), f, dso__kernel(dso));
+ __symbols__insert(dso__symbols(curr_dso), f);
nr++;
}
dso__put(curr_dso);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ce9195717f44..92bc28934f36 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -25,6 +25,8 @@
#include "demangle-ocaml.h"
#include "demangle-rust-v0.h"
#include "dso.h"
+#include "dwarf-regs.h"
+#include "env.h"
#include "util.h" // lsdir()
#include "event.h"
#include "machine.h"
@@ -50,7 +52,6 @@
static int dso__load_kernel_sym(struct dso *dso, struct map *map);
static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
-static bool symbol__is_idle(const char *name);
int vmlinux_path__nr_entries;
char **vmlinux_path;
@@ -357,8 +358,7 @@ void symbols__delete(struct rb_root_cached *symbols)
}
}
-void __symbols__insert(struct rb_root_cached *symbols,
- struct symbol *sym, bool kernel)
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
{
struct rb_node **p = &symbols->rb_root.rb_node;
struct rb_node *parent = NULL;
@@ -366,17 +366,6 @@ void __symbols__insert(struct rb_root_cached *symbols,
struct symbol *s;
bool leftmost = true;
- if (kernel) {
- const char *name = sym->name;
- /*
- * ppc64 uses function descriptors and appends a '.' to the
- * start of every instruction address. Remove it.
- */
- if (name[0] == '.')
- name++;
- sym->idle = symbol__is_idle(name);
- }
-
while (*p != NULL) {
parent = *p;
s = rb_entry(parent, struct symbol, rb_node);
@@ -393,7 +382,7 @@ void __symbols__insert(struct rb_root_cached *symbols,
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
{
- __symbols__insert(symbols, sym, false);
+ __symbols__insert(symbols, sym);
}
static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
@@ -554,7 +543,7 @@ void dso__reset_find_symbol_cache(struct dso *dso)
void dso__insert_symbol(struct dso *dso, struct symbol *sym)
{
- __symbols__insert(dso__symbols(dso), sym, dso__kernel(dso));
+ __symbols__insert(dso__symbols(dso), sym);
/* update the symbol cache if necessary */
if (dso__last_find_result_addr(dso) >= sym->start &&
@@ -716,47 +705,87 @@ int modules__parse(const char *filename, void *arg,
return err;
}
+static int sym_name_cmp(const void *a, const void *b)
+{
+ const char *name = a;
+ const char *const *sym = b;
+
+ return strcmp(name, *sym);
+}
+
/*
* These are symbols in the kernel image, so make sure that
* sym is from a kernel DSO.
*/
-static bool symbol__is_idle(const char *name)
+bool symbol__is_idle(struct symbol *sym, const struct dso *dso, struct perf_env *env)
{
- const char * const idle_symbols[] = {
+ static const char * const idle_symbols[] = {
"acpi_idle_do_entry",
"acpi_processor_ffh_cstate_enter",
"arch_cpu_idle",
"cpu_idle",
"cpu_startup_entry",
- "idle_cpu",
- "intel_idle",
- "intel_idle_ibrs",
"default_idle",
- "native_safe_halt",
"enter_idle",
"exit_idle",
- "mwait_idle",
- "mwait_idle_with_hints",
- "mwait_idle_with_hints.constprop.0",
+ "idle_cpu",
+ "native_safe_halt",
"poll_idle",
- "ppc64_runlatch_off",
"pseries_dedicated_idle_sleep",
- "psw_idle",
- "psw_idle_exit",
- NULL
};
- int i;
- static struct strlist *idle_symbols_list;
+ const char *name = sym->name;
+ uint16_t e_machine = perf_env__e_machine(env, /*e_flags=*/NULL);
- if (idle_symbols_list)
- return strlist__has_entry(idle_symbols_list, name);
+ if (sym->idle)
+ return sym->idle == SYMBOL_IDLE__IDLE;
- idle_symbols_list = strlist__new(NULL, NULL);
+ if (!dso || dso__kernel(dso) == DSO_SPACE__USER) {
+ sym->idle = SYMBOL_IDLE__NOT_IDLE;
+ return false;
+ }
- for (i = 0; idle_symbols[i]; i++)
- strlist__add(idle_symbols_list, idle_symbols[i]);
+ /*
+ * ppc64 uses function descriptors and appends a '.' to the
+ * start of every instruction address. Remove it.
+ */
+ if (name[0] == '.')
+ name++;
- return strlist__has_entry(idle_symbols_list, name);
+ if (bsearch(name, idle_symbols, ARRAY_SIZE(idle_symbols),
+ sizeof(idle_symbols[0]), sym_name_cmp)) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+
+ if (e_machine == EM_386 || e_machine == EM_X86_64) {
+ if (strstarts(name, "mwait_idle") ||
+ strstarts(name, "intel_idle")) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+ }
+
+ if (e_machine == EM_PPC64 && !strcmp(name, "ppc64_runlatch_off")) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+
+ if (e_machine == EM_S390) {
+ int major = 0, minor = 0;
+ const char *release = env && env->os_release
+ ? env->os_release : perf_version_string;
+
+ sscanf(release, "%d.%d", &major, &minor);
+
+ /* Before v6.10, s390 used psw_idle. */
+ if ((major < 6 || (major == 6 && minor < 10)) && strstarts(name, "psw_idle")) {
+ sym->idle = SYMBOL_IDLE__IDLE;
+ return true;
+ }
+ }
+
+ sym->idle = SYMBOL_IDLE__NOT_IDLE;
+ return false;
}
static int map__process_kallsym_symbol(void *arg, const char *name,
@@ -785,7 +814,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
* We will pass the symbols to the filter later, in
* map__split_kallsyms, when we have split the maps per module
*/
- __symbols__insert(root, sym, !strchr(name, '['));
+ __symbols__insert(root, sym);
return 0;
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index c67814d6d6d6..65422c1c8fdb 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -25,6 +25,7 @@ struct dso;
struct map;
struct maps;
struct option;
+struct perf_env;
struct build_id;
/*
@@ -42,6 +43,12 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
GElf_Shdr *shp, const char *name, size_t *idx);
#endif
+enum symbol_idle_kind {
+ SYMBOL_IDLE__UNKNOWN = 0,
+ SYMBOL_IDLE__NOT_IDLE = 1,
+ SYMBOL_IDLE__IDLE = 2,
+};
+
/**
* A symtab entry. When allocated this may be preceded by an annotation (see
* symbol__annotation) and/or a browser_index (see symbol__browser_index).
@@ -57,8 +64,8 @@ struct symbol {
u8 type:4;
/** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
u8 binding:4;
- /** Set true for kernel symbols of idle routines. */
- u8 idle:1;
+ /** Cache for symbol__is_idle. */
+ enum symbol_idle_kind idle:2;
/** Resolvable but tools ignore it (e.g. idle routines). */
u8 ignore:1;
/** Symbol for an inlined function. */
@@ -202,8 +209,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
-void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
- bool kernel);
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols);
void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
@@ -286,5 +292,6 @@ enum {
};
int symbol__validate_sym_arguments(void);
+bool symbol__is_idle(struct symbol *sym, const struct dso *dso, struct perf_env *env);
#endif /* __PERF_SYMBOL */
--
2.53.0.1018.g2bb0e51243-goog
Hi Ian,
FYI. It works on my icx machine with 'perf top'.
Thanks,
Honglei
On 3/27/26 1:45 AM, Ian Rogers wrote:
> Move the idle boolean to a helper symbol__is_idle function. In the
> function lazily compute whether a symbol is an idle function taking
> into consideration the kernel version and architecture of the
> machine. As symbols__insert no longer needs to know if a symbol is for
> the kernel, remove the argument.
>
> This change is inspired by mailing list discussion, particularly from
> Thomas Richter <tmricht@linux.ibm.com> and Heiko Carstens
> <hca@linux.ibm.com>:
> https://lore.kernel.org/lkml/20260219113850.354271-1-tmricht@linux.ibm.com/
>
> The change switches x86 matches to use strstarts which means
> intel_idle_irq is matched as part of strstarts(name, "intel_idle"), a
> change suggested by Honglei Wang <jameshongleiwang@126.com> in:
> https://lore.kernel.org/lkml/20260323085255.98173-1-jameshongleiwang@126.com/
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/builtin-top.c | 6 +-
> tools/perf/util/symbol-elf.c | 2 +-
> tools/perf/util/symbol.c | 105 ++++++++++++++++++++++-------------
> tools/perf/util/symbol.h | 15 +++--
> 4 files changed, 84 insertions(+), 44 deletions(-)
>
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 37950efb28ac..bdc1c761cd61 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -751,6 +751,7 @@ static void perf_event__process_sample(const struct perf_tool *tool,
> {
> struct perf_top *top = container_of(tool, struct perf_top, tool);
> struct addr_location al;
> + struct dso *dso = NULL;
>
> if (!machine && perf_guest) {
> static struct intlist *seen;
> @@ -830,7 +831,10 @@ static void perf_event__process_sample(const struct perf_tool *tool,
> }
> }
>
> - if (al.sym == NULL || !al.sym->idle) {
> + if (al.map)
> + dso = map__dso(al.map);
> +
> + if (al.sym == NULL || !symbol__is_idle(al.sym, dso, machine->env)) {
> struct hists *hists = evsel__hists(evsel);
> struct hist_entry_iter iter = {
> .evsel = evsel,
> diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
> index 3cd4e5a03cc5..9fabf5146d89 100644
> --- a/tools/perf/util/symbol-elf.c
> +++ b/tools/perf/util/symbol-elf.c
> @@ -1723,7 +1723,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
>
> arch__sym_update(f, &sym);
>
> - __symbols__insert(dso__symbols(curr_dso), f, dso__kernel(dso));
> + __symbols__insert(dso__symbols(curr_dso), f);
> nr++;
> }
> dso__put(curr_dso);
> diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
> index ce9195717f44..92bc28934f36 100644
> --- a/tools/perf/util/symbol.c
> +++ b/tools/perf/util/symbol.c
> @@ -25,6 +25,8 @@
> #include "demangle-ocaml.h"
> #include "demangle-rust-v0.h"
> #include "dso.h"
> +#include "dwarf-regs.h"
> +#include "env.h"
> #include "util.h" // lsdir()
> #include "event.h"
> #include "machine.h"
> @@ -50,7 +52,6 @@
>
> static int dso__load_kernel_sym(struct dso *dso, struct map *map);
> static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
> -static bool symbol__is_idle(const char *name);
>
> int vmlinux_path__nr_entries;
> char **vmlinux_path;
> @@ -357,8 +358,7 @@ void symbols__delete(struct rb_root_cached *symbols)
> }
> }
>
> -void __symbols__insert(struct rb_root_cached *symbols,
> - struct symbol *sym, bool kernel)
> +void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
> {
> struct rb_node **p = &symbols->rb_root.rb_node;
> struct rb_node *parent = NULL;
> @@ -366,17 +366,6 @@ void __symbols__insert(struct rb_root_cached *symbols,
> struct symbol *s;
> bool leftmost = true;
>
> - if (kernel) {
> - const char *name = sym->name;
> - /*
> - * ppc64 uses function descriptors and appends a '.' to the
> - * start of every instruction address. Remove it.
> - */
> - if (name[0] == '.')
> - name++;
> - sym->idle = symbol__is_idle(name);
> - }
> -
> while (*p != NULL) {
> parent = *p;
> s = rb_entry(parent, struct symbol, rb_node);
> @@ -393,7 +382,7 @@ void __symbols__insert(struct rb_root_cached *symbols,
>
> void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
> {
> - __symbols__insert(symbols, sym, false);
> + __symbols__insert(symbols, sym);
> }
>
> static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
> @@ -554,7 +543,7 @@ void dso__reset_find_symbol_cache(struct dso *dso)
>
> void dso__insert_symbol(struct dso *dso, struct symbol *sym)
> {
> - __symbols__insert(dso__symbols(dso), sym, dso__kernel(dso));
> + __symbols__insert(dso__symbols(dso), sym);
>
> /* update the symbol cache if necessary */
> if (dso__last_find_result_addr(dso) >= sym->start &&
> @@ -716,47 +705,87 @@ int modules__parse(const char *filename, void *arg,
> return err;
> }
>
> +static int sym_name_cmp(const void *a, const void *b)
> +{
> + const char *name = a;
> + const char *const *sym = b;
> +
> + return strcmp(name, *sym);
> +}
> +
> /*
> * These are symbols in the kernel image, so make sure that
> * sym is from a kernel DSO.
> */
> -static bool symbol__is_idle(const char *name)
> +bool symbol__is_idle(struct symbol *sym, const struct dso *dso, struct perf_env *env)
> {
> - const char * const idle_symbols[] = {
> + static const char * const idle_symbols[] = {
> "acpi_idle_do_entry",
> "acpi_processor_ffh_cstate_enter",
> "arch_cpu_idle",
> "cpu_idle",
> "cpu_startup_entry",
> - "idle_cpu",
> - "intel_idle",
> - "intel_idle_ibrs",
> "default_idle",
> - "native_safe_halt",
> "enter_idle",
> "exit_idle",
> - "mwait_idle",
> - "mwait_idle_with_hints",
> - "mwait_idle_with_hints.constprop.0",
> + "idle_cpu",
> + "native_safe_halt",
> "poll_idle",
> - "ppc64_runlatch_off",
> "pseries_dedicated_idle_sleep",
> - "psw_idle",
> - "psw_idle_exit",
> - NULL
> };
> - int i;
> - static struct strlist *idle_symbols_list;
> + const char *name = sym->name;
> + uint16_t e_machine = perf_env__e_machine(env, /*e_flags=*/NULL);
>
> - if (idle_symbols_list)
> - return strlist__has_entry(idle_symbols_list, name);
> + if (sym->idle)
> + return sym->idle == SYMBOL_IDLE__IDLE;
>
> - idle_symbols_list = strlist__new(NULL, NULL);
> + if (!dso || dso__kernel(dso) == DSO_SPACE__USER) {
> + sym->idle = SYMBOL_IDLE__NOT_IDLE;
> + return false;
> + }
>
> - for (i = 0; idle_symbols[i]; i++)
> - strlist__add(idle_symbols_list, idle_symbols[i]);
> + /*
> + * ppc64 uses function descriptors and appends a '.' to the
> + * start of every instruction address. Remove it.
> + */
> + if (name[0] == '.')
> + name++;
>
> - return strlist__has_entry(idle_symbols_list, name);
> + if (bsearch(name, idle_symbols, ARRAY_SIZE(idle_symbols),
> + sizeof(idle_symbols[0]), sym_name_cmp)) {
> + sym->idle = SYMBOL_IDLE__IDLE;
> + return true;
> + }
> +
> + if (e_machine == EM_386 || e_machine == EM_X86_64) {
> + if (strstarts(name, "mwait_idle") ||
> + strstarts(name, "intel_idle")) {
> + sym->idle = SYMBOL_IDLE__IDLE;
> + return true;
> + }
> + }
> +
> + if (e_machine == EM_PPC64 && !strcmp(name, "ppc64_runlatch_off")) {
> + sym->idle = SYMBOL_IDLE__IDLE;
> + return true;
> + }
> +
> + if (e_machine == EM_S390) {
> + int major = 0, minor = 0;
> + const char *release = env && env->os_release
> + ? env->os_release : perf_version_string;
> +
> + sscanf(release, "%d.%d", &major, &minor);
> +
> + /* Before v6.10, s390 used psw_idle. */
> + if ((major < 6 || (major == 6 && minor < 10)) && strstarts(name, "psw_idle")) {
> + sym->idle = SYMBOL_IDLE__IDLE;
> + return true;
> + }
> + }
> +
> + sym->idle = SYMBOL_IDLE__NOT_IDLE;
> + return false;
> }
>
> static int map__process_kallsym_symbol(void *arg, const char *name,
> @@ -785,7 +814,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
> * We will pass the symbols to the filter later, in
> * map__split_kallsyms, when we have split the maps per module
> */
> - __symbols__insert(root, sym, !strchr(name, '['));
> + __symbols__insert(root, sym);
>
> return 0;
> }
> diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
> index c67814d6d6d6..65422c1c8fdb 100644
> --- a/tools/perf/util/symbol.h
> +++ b/tools/perf/util/symbol.h
> @@ -25,6 +25,7 @@ struct dso;
> struct map;
> struct maps;
> struct option;
> +struct perf_env;
> struct build_id;
>
> /*
> @@ -42,6 +43,12 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
> GElf_Shdr *shp, const char *name, size_t *idx);
> #endif
>
> +enum symbol_idle_kind {
> + SYMBOL_IDLE__UNKNOWN = 0,
> + SYMBOL_IDLE__NOT_IDLE = 1,
> + SYMBOL_IDLE__IDLE = 2,
> +};
> +
> /**
> * A symtab entry. When allocated this may be preceded by an annotation (see
> * symbol__annotation) and/or a browser_index (see symbol__browser_index).
> @@ -57,8 +64,8 @@ struct symbol {
> u8 type:4;
> /** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
> u8 binding:4;
> - /** Set true for kernel symbols of idle routines. */
> - u8 idle:1;
> + /** Cache for symbol__is_idle. */
> + enum symbol_idle_kind idle:2;
> /** Resolvable but tools ignore it (e.g. idle routines). */
> u8 ignore:1;
> /** Symbol for an inlined function. */
> @@ -202,8 +209,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
>
> char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
>
> -void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
> - bool kernel);
> +void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
> void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
> void symbols__fixup_duplicate(struct rb_root_cached *symbols);
> void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
> @@ -286,5 +292,6 @@ enum {
> };
>
> int symbol__validate_sym_arguments(void);
> +bool symbol__is_idle(struct symbol *sym, const struct dso *dso, struct perf_env *env);
>
> #endif /* __PERF_SYMBOL */
Writing to the test output files in the current working directory can
fail in various contexts such as continual test. Other tests write to
a mktemp-ed file, make the "perf script task-analyszer tests" follow
this convention too. Currently this isn't possible for the perf.data
file due to a lack of perf script support, add a variable for when
this support is available.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/tests/shell/test_task_analyzer.sh | 38 +++++++++++---------
1 file changed, 21 insertions(+), 17 deletions(-)
diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh
index e194fcf61df3..b1a6a7e017e4 100755
--- a/tools/perf/tests/shell/test_task_analyzer.sh
+++ b/tools/perf/tests/shell/test_task_analyzer.sh
@@ -3,6 +3,11 @@
# SPDX-License-Identifier: GPL-2.0
tmpdir=$(mktemp -d /tmp/perf-script-task-analyzer-XXXXX)
+# TODO: perf script report only supports input from the CWD perf.data file, make
+# it support input from any file.
+perfdata="perf.data"
+csv="$tmpdir/csv"
+csvsummary="$tmpdir/csvsummary"
err=0
# set PERF_EXEC_PATH to find scripts in the source directory
@@ -15,11 +20,10 @@ fi
export ASAN_OPTIONS=detect_leaks=0
cleanup() {
- rm -f perf.data
- rm -f perf.data.old
- rm -f csv
- rm -f csvsummary
+ rm -f "${perfdata}"
+ rm -f "${perfdata}".old
rm -rf "$tmpdir"
+
trap - exit term int
}
@@ -61,7 +65,7 @@ skip_no_probe_record_support() {
prepare_perf_data() {
# 1s should be sufficient to catch at least some switches
- perf record -e sched:sched_switch -a -- sleep 1 > /dev/null 2>&1
+ perf record -e sched:sched_switch -a -o "${perfdata}" -- sleep 1 > /dev/null 2>&1
# check if perf data file got created in above step.
if [ ! -e "perf.data" ]; then
printf "FAIL: perf record failed to create \"perf.data\" \n"
@@ -130,28 +134,28 @@ test_extended_times_summary_ns() {
}
test_csv() {
- perf script report task-analyzer --csv csv > /dev/null
- check_exec_0 "perf script report task-analyzer --csv csv"
- find_str_or_fail "Comm;" csv "${FUNCNAME[0]}"
+ perf script report task-analyzer --csv "${csv}" > /dev/null
+ check_exec_0 "perf script report task-analyzer --csv ${csv}"
+ find_str_or_fail "Comm;" "${csv}" "${FUNCNAME[0]}"
}
test_csv_extended_times() {
- perf script report task-analyzer --csv csv --extended-times > /dev/null
- check_exec_0 "perf script report task-analyzer --csv csv --extended-times"
- find_str_or_fail "Out-Out;" csv "${FUNCNAME[0]}"
+ perf script report task-analyzer --csv "${csv}" --extended-times > /dev/null
+ check_exec_0 "perf script report task-analyzer --csv ${csv} --extended-times"
+ find_str_or_fail "Out-Out;" "${csv}" "${FUNCNAME[0]}"
}
test_csvsummary() {
- perf script report task-analyzer --csv-summary csvsummary > /dev/null
- check_exec_0 "perf script report task-analyzer --csv-summary csvsummary"
- find_str_or_fail "Comm;" csvsummary "${FUNCNAME[0]}"
+ perf script report task-analyzer --csv-summary "${csvsummary}" > /dev/null
+ check_exec_0 "perf script report task-analyzer --csv-summary ${csvsummary}"
+ find_str_or_fail "Comm;" "${csvsummary}" "${FUNCNAME[0]}"
}
test_csvsummary_extended() {
- perf script report task-analyzer --csv-summary csvsummary --summary-extended \
+ perf script report task-analyzer --csv-summary "${csvsummary}" --summary-extended \
>/dev/null
- check_exec_0 "perf script report task-analyzer --csv-summary csvsummary --summary-extended"
- find_str_or_fail "Out-Out;" csvsummary "${FUNCNAME[0]}"
+ check_exec_0 "perf script report task-analyzer --csv-summary ${csvsummary} --summary-extended"
+ find_str_or_fail "Out-Out;" "${csvsummary}" "${FUNCNAME[0]}"
}
skip_no_probe_record_support
--
2.53.0.1018.g2bb0e51243-goog
I'm curious why this patch is in the idle symbol thread.
On Thu, Mar 26, 2026 at 11:00:33PM -0700, Ian Rogers wrote:
> Writing to the test output files in the current working directory can
> fail in various contexts such as continual test. Other tests write to
> a mktemp-ed file, make the "perf script task-analyszer tests" follow
> this convention too. Currently this isn't possible for the perf.data
> file due to a lack of perf script support, add a variable for when
> this support is available.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/tests/shell/test_task_analyzer.sh | 38 +++++++++++---------
> 1 file changed, 21 insertions(+), 17 deletions(-)
>
> diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh
> index e194fcf61df3..b1a6a7e017e4 100755
> --- a/tools/perf/tests/shell/test_task_analyzer.sh
> +++ b/tools/perf/tests/shell/test_task_analyzer.sh
> @@ -3,6 +3,11 @@
> # SPDX-License-Identifier: GPL-2.0
>
> tmpdir=$(mktemp -d /tmp/perf-script-task-analyzer-XXXXX)
> +# TODO: perf script report only supports input from the CWD perf.data file, make
> +# it support input from any file.
> +perfdata="perf.data"
> +csv="$tmpdir/csv"
> +csvsummary="$tmpdir/csvsummary"
> err=0
>
> # set PERF_EXEC_PATH to find scripts in the source directory
> @@ -15,11 +20,10 @@ fi
> export ASAN_OPTIONS=detect_leaks=0
>
> cleanup() {
> - rm -f perf.data
> - rm -f perf.data.old
> - rm -f csv
> - rm -f csvsummary
> + rm -f "${perfdata}"
> + rm -f "${perfdata}".old
> rm -rf "$tmpdir"
> +
> trap - exit term int
> }
>
> @@ -61,7 +65,7 @@ skip_no_probe_record_support() {
>
> prepare_perf_data() {
> # 1s should be sufficient to catch at least some switches
> - perf record -e sched:sched_switch -a -- sleep 1 > /dev/null 2>&1
> + perf record -e sched:sched_switch -a -o "${perfdata}" -- sleep 1 > /dev/null 2>&1
> # check if perf data file got created in above step.
> if [ ! -e "perf.data" ]; then
> printf "FAIL: perf record failed to create \"perf.data\" \n"
Please update this part too.
Thanks,
Namhyung
> @@ -130,28 +134,28 @@ test_extended_times_summary_ns() {
> }
>
> test_csv() {
> - perf script report task-analyzer --csv csv > /dev/null
> - check_exec_0 "perf script report task-analyzer --csv csv"
> - find_str_or_fail "Comm;" csv "${FUNCNAME[0]}"
> + perf script report task-analyzer --csv "${csv}" > /dev/null
> + check_exec_0 "perf script report task-analyzer --csv ${csv}"
> + find_str_or_fail "Comm;" "${csv}" "${FUNCNAME[0]}"
> }
>
> test_csv_extended_times() {
> - perf script report task-analyzer --csv csv --extended-times > /dev/null
> - check_exec_0 "perf script report task-analyzer --csv csv --extended-times"
> - find_str_or_fail "Out-Out;" csv "${FUNCNAME[0]}"
> + perf script report task-analyzer --csv "${csv}" --extended-times > /dev/null
> + check_exec_0 "perf script report task-analyzer --csv ${csv} --extended-times"
> + find_str_or_fail "Out-Out;" "${csv}" "${FUNCNAME[0]}"
> }
>
> test_csvsummary() {
> - perf script report task-analyzer --csv-summary csvsummary > /dev/null
> - check_exec_0 "perf script report task-analyzer --csv-summary csvsummary"
> - find_str_or_fail "Comm;" csvsummary "${FUNCNAME[0]}"
> + perf script report task-analyzer --csv-summary "${csvsummary}" > /dev/null
> + check_exec_0 "perf script report task-analyzer --csv-summary ${csvsummary}"
> + find_str_or_fail "Comm;" "${csvsummary}" "${FUNCNAME[0]}"
> }
>
> test_csvsummary_extended() {
> - perf script report task-analyzer --csv-summary csvsummary --summary-extended \
> + perf script report task-analyzer --csv-summary "${csvsummary}" --summary-extended \
> >/dev/null
> - check_exec_0 "perf script report task-analyzer --csv-summary csvsummary --summary-extended"
> - find_str_or_fail "Out-Out;" csvsummary "${FUNCNAME[0]}"
> + check_exec_0 "perf script report task-analyzer --csv-summary ${csvsummary} --summary-extended"
> + find_str_or_fail "Out-Out;" "${csvsummary}" "${FUNCNAME[0]}"
> }
>
> skip_no_probe_record_support
> --
> 2.53.0.1018.g2bb0e51243-goog
>
On Tue, Mar 31, 2026 at 12:22 AM Namhyung Kim <namhyung@kernel.org> wrote:
>
> I'm curious why this patch is in the idle symbol thread.
I'll separate it, I was gathering fixes. Same branch has the BPF
counters test fix in it:
https://lore.kernel.org/lkml/20260325171653.1091337-1-irogers@google.com/
> On Thu, Mar 26, 2026 at 11:00:33PM -0700, Ian Rogers wrote:
> > Writing to the test output files in the current working directory can
> > fail in various contexts such as continual test. Other tests write to
> > a mktemp-ed file, make the "perf script task-analyszer tests" follow
> > this convention too. Currently this isn't possible for the perf.data
> > file due to a lack of perf script support, add a variable for when
> > this support is available.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/perf/tests/shell/test_task_analyzer.sh | 38 +++++++++++---------
> > 1 file changed, 21 insertions(+), 17 deletions(-)
> >
> > diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh
> > index e194fcf61df3..b1a6a7e017e4 100755
> > --- a/tools/perf/tests/shell/test_task_analyzer.sh
> > +++ b/tools/perf/tests/shell/test_task_analyzer.sh
> > @@ -3,6 +3,11 @@
> > # SPDX-License-Identifier: GPL-2.0
> >
> > tmpdir=$(mktemp -d /tmp/perf-script-task-analyzer-XXXXX)
> > +# TODO: perf script report only supports input from the CWD perf.data file, make
> > +# it support input from any file.
> > +perfdata="perf.data"
> > +csv="$tmpdir/csv"
> > +csvsummary="$tmpdir/csvsummary"
> > err=0
> >
> > # set PERF_EXEC_PATH to find scripts in the source directory
> > @@ -15,11 +20,10 @@ fi
> > export ASAN_OPTIONS=detect_leaks=0
> >
> > cleanup() {
> > - rm -f perf.data
> > - rm -f perf.data.old
> > - rm -f csv
> > - rm -f csvsummary
> > + rm -f "${perfdata}"
> > + rm -f "${perfdata}".old
> > rm -rf "$tmpdir"
> > +
> > trap - exit term int
> > }
> >
> > @@ -61,7 +65,7 @@ skip_no_probe_record_support() {
> >
> > prepare_perf_data() {
> > # 1s should be sufficient to catch at least some switches
> > - perf record -e sched:sched_switch -a -- sleep 1 > /dev/null 2>&1
> > + perf record -e sched:sched_switch -a -o "${perfdata}" -- sleep 1 > /dev/null 2>&1
> > # check if perf data file got created in above step.
> > if [ ! -e "perf.data" ]; then
> > printf "FAIL: perf record failed to create \"perf.data\" \n"
>
> Please update this part too.
Done.
Thanks,
Ian
> Thanks,
> Namhyung
>
>
> > @@ -130,28 +134,28 @@ test_extended_times_summary_ns() {
> > }
> >
> > test_csv() {
> > - perf script report task-analyzer --csv csv > /dev/null
> > - check_exec_0 "perf script report task-analyzer --csv csv"
> > - find_str_or_fail "Comm;" csv "${FUNCNAME[0]}"
> > + perf script report task-analyzer --csv "${csv}" > /dev/null
> > + check_exec_0 "perf script report task-analyzer --csv ${csv}"
> > + find_str_or_fail "Comm;" "${csv}" "${FUNCNAME[0]}"
> > }
> >
> > test_csv_extended_times() {
> > - perf script report task-analyzer --csv csv --extended-times > /dev/null
> > - check_exec_0 "perf script report task-analyzer --csv csv --extended-times"
> > - find_str_or_fail "Out-Out;" csv "${FUNCNAME[0]}"
> > + perf script report task-analyzer --csv "${csv}" --extended-times > /dev/null
> > + check_exec_0 "perf script report task-analyzer --csv ${csv} --extended-times"
> > + find_str_or_fail "Out-Out;" "${csv}" "${FUNCNAME[0]}"
> > }
> >
> > test_csvsummary() {
> > - perf script report task-analyzer --csv-summary csvsummary > /dev/null
> > - check_exec_0 "perf script report task-analyzer --csv-summary csvsummary"
> > - find_str_or_fail "Comm;" csvsummary "${FUNCNAME[0]}"
> > + perf script report task-analyzer --csv-summary "${csvsummary}" > /dev/null
> > + check_exec_0 "perf script report task-analyzer --csv-summary ${csvsummary}"
> > + find_str_or_fail "Comm;" "${csvsummary}" "${FUNCNAME[0]}"
> > }
> >
> > test_csvsummary_extended() {
> > - perf script report task-analyzer --csv-summary csvsummary --summary-extended \
> > + perf script report task-analyzer --csv-summary "${csvsummary}" --summary-extended \
> > >/dev/null
> > - check_exec_0 "perf script report task-analyzer --csv-summary csvsummary --summary-extended"
> > - find_str_or_fail "Out-Out;" csvsummary "${FUNCNAME[0]}"
> > + check_exec_0 "perf script report task-analyzer --csv-summary ${csvsummary} --summary-extended"
> > + find_str_or_fail "Out-Out;" "${csvsummary}" "${FUNCNAME[0]}"
> > }
> >
> > skip_no_probe_record_support
> > --
> > 2.53.0.1018.g2bb0e51243-goog
> >
On Tue, Mar 31, 2026 at 10:58:55AM -0700, Ian Rogers wrote: > On Tue, Mar 31, 2026 at 12:22 AM Namhyung Kim <namhyung@kernel.org> wrote: > > > > I'm curious why this patch is in the idle symbol thread. > > I'll separate it, I was gathering fixes. Same branch has the BPF > counters test fix in it: > https://lore.kernel.org/lkml/20260325171653.1091337-1-irogers@google.com/ Ok, I'll test and process it. Thanks, Namhyung
© 2016 - 2026 Red Hat, Inc.