... | ... | ||
---|---|---|---|
19 | to a more consistent cpu_cycles and avoided the problem. With these | 19 | to a more consistent cpu_cycles and avoided the problem. With these |
20 | changes the problematic event will now be skipped, a large warning | 20 | changes the problematic event will now be skipped, a large warning |
21 | produced, and perf record will continue for the other PMU events. This | 21 | produced, and perf record will continue for the other PMU events. This |
22 | solution was proposed by Arnaldo. | 22 | solution was proposed by Arnaldo. |
23 | 23 | ||
24 | Two minor changes have been added to help with the error message and | 24 | v6: Rebase of v5 (dropping already merged patches): |
25 | to work around issues occurring with "perf stat metrics (shadow stat) | 25 | https://lore.kernel.org/lkml/20250109222109.567031-1-irogers@google.com/ |
26 | test". | 26 | that unusually had an RFC posted for it: |
27 | 27 | https://lore.kernel.org/lkml/Z7Z5kv75BMML2A1q@google.com/ | |
28 | The patches have only been tested on my x86 non-hybrid laptop. | 28 | Note, this patch conflicts/contradicts: |
29 | https://lore.kernel.org/lkml/20250312211623.2495798-1-irogers@google.com/ | ||
30 | that I posted so that we could either consistently prioritize | ||
31 | sysfs/json (these patches) or legacy events (the other | ||
32 | patches). That lack of event printing and encoding inconsistency | ||
33 | is most prominent in the encoding of events like "instructions" | ||
34 | which on hybrid are reported as "cpu_core/instructions/" but | ||
35 | "instructions" before these patches gets a legacy encoding while | ||
36 | "cpu_core/instructions/" gets a sysfs/json encoding. These patches | ||
37 | make "instructions" always get a sysfs/json encoding while the | ||
38 | alternate patches make it always get a legacy encoding. | ||
29 | 39 | ||
30 | v5: Follow Namhyung's suggestion and ignore the case where command | 40 | v5: Follow Namhyung's suggestion and ignore the case where command |
31 | line dummy events fail to open alongside other events that all | 41 | line dummy events fail to open alongside other events that all |
32 | fail to open. Note, the Tested-by tags are left on the series as | 42 | fail to open. Note, the Tested-by tags are left on the series as |
33 | v4 and v5 were changing an error case that doesn't occur in | 43 | v4 and v5 were changing an error case that doesn't occur in |
... | ... | ||
57 | 67 | ||
58 | v2: Rebase and add tested-by tags from James Clark, Leo Yan and Atish | 68 | v2: Rebase and add tested-by tags from James Clark, Leo Yan and Atish |
59 | Patra who have tested on RISC-V and ARM CPUs, including the | 69 | Patra who have tested on RISC-V and ARM CPUs, including the |
60 | problem case from before. | 70 | problem case from before. |
61 | 71 | ||
62 | Ian Rogers (4): | 72 | Ian Rogers (2): |
63 | perf evsel: Add pmu_name helper | ||
64 | perf stat: Fix find_stat for mixed legacy/non-legacy events | ||
65 | perf record: Skip don't fail for events that don't open | 73 | perf record: Skip don't fail for events that don't open |
66 | perf parse-events: Reapply "Prefer sysfs/JSON hardware events over | 74 | perf parse-events: Reapply "Prefer sysfs/JSON hardware events over |
67 | legacy" | 75 | legacy" |
68 | 76 | ||
69 | tools/perf/builtin-record.c | 47 ++++++++++++++++++--- | 77 | tools/perf/builtin-record.c | 47 ++++++++++++++++++--- |
70 | tools/perf/util/evsel.c | 10 +++++ | ||
71 | tools/perf/util/evsel.h | 1 + | ||
72 | tools/perf/util/parse-events.c | 26 +++++++++--- | 78 | tools/perf/util/parse-events.c | 26 +++++++++--- |
73 | tools/perf/util/parse-events.l | 76 +++++++++++++++++----------------- | 79 | tools/perf/util/parse-events.l | 76 +++++++++++++++++----------------- |
74 | tools/perf/util/parse-events.y | 60 ++++++++++++++++++--------- | 80 | tools/perf/util/parse-events.y | 60 ++++++++++++++++++--------- |
75 | tools/perf/util/pmus.c | 20 +++++++-- | 81 | 4 files changed, 139 insertions(+), 70 deletions(-) |
76 | tools/perf/util/stat-shadow.c | 3 +- | ||
77 | 8 files changed, 169 insertions(+), 74 deletions(-) | ||
78 | 82 | ||
79 | -- | 83 | -- |
80 | 2.47.1.613.gc27f4b7a9f-goog | 84 | 2.49.0.395.g12beb8f557-goog | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add helper to get the name of the evsel's PMU. This handles the case | ||
2 | where there's no sysfs PMU via parse_events event_type helper. | ||
3 | 1 | ||
4 | Signed-off-by: Ian Rogers <irogers@google.com> | ||
5 | Tested-by: James Clark <james.clark@linaro.org> | ||
6 | Tested-by: Leo Yan <leo.yan@arm.com> | ||
7 | Tested-by: Atish Patra <atishp@rivosinc.com> | ||
8 | --- | ||
9 | tools/perf/util/evsel.c | 10 ++++++++++ | ||
10 | tools/perf/util/evsel.h | 1 + | ||
11 | 2 files changed, 11 insertions(+) | ||
12 | |||
13 | diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tools/perf/util/evsel.c | ||
16 | +++ b/tools/perf/util/evsel.c | ||
17 | @@ -XXX,XX +XXX,XX @@ int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel), | ||
18 | return 0; | ||
19 | } | ||
20 | |||
21 | +const char *evsel__pmu_name(const struct evsel *evsel) | ||
22 | +{ | ||
23 | + struct perf_pmu *pmu = evsel__find_pmu(evsel); | ||
24 | + | ||
25 | + if (pmu) | ||
26 | + return pmu->name; | ||
27 | + | ||
28 | + return event_type(evsel->core.attr.type); | ||
29 | +} | ||
30 | + | ||
31 | #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) | ||
32 | |||
33 | int __evsel__sample_size(u64 sample_type) | ||
34 | diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/tools/perf/util/evsel.h | ||
37 | +++ b/tools/perf/util/evsel.h | ||
38 | @@ -XXX,XX +XXX,XX @@ int evsel__object_config(size_t object_size, | ||
39 | void (*fini)(struct evsel *evsel)); | ||
40 | |||
41 | struct perf_pmu *evsel__find_pmu(const struct evsel *evsel); | ||
42 | +const char *evsel__pmu_name(const struct evsel *evsel); | ||
43 | bool evsel__is_aux_event(const struct evsel *evsel); | ||
44 | |||
45 | struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); | ||
46 | -- | ||
47 | 2.47.1.613.gc27f4b7a9f-goog | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Legacy events typically don't have a PMU when added leading to | ||
2 | mismatched legacy/non-legacy cases in find_stat. Use evsel__find_pmu | ||
3 | to make sure the evsel PMU is looked up. Update the evsel__find_pmu | ||
4 | code to look for the PMU using the extended config type or, for legacy | ||
5 | hardware/hw_cache events on non-hybrid systems, just use the core PMU. | ||
6 | 1 | ||
7 | Before: | ||
8 | ``` | ||
9 | $ perf stat -e cycles,cpu/instructions/ -a sleep 1 | ||
10 | Performance counter stats for 'system wide': | ||
11 | |||
12 | 215,309,764 cycles | ||
13 | 44,326,491 cpu/instructions/ | ||
14 | |||
15 | 1.002555314 seconds time elapsed | ||
16 | ``` | ||
17 | After: | ||
18 | ``` | ||
19 | $ perf stat -e cycles,cpu/instructions/ -a sleep 1 | ||
20 | |||
21 | Performance counter stats for 'system wide': | ||
22 | |||
23 | 990,676,332 cycles | ||
24 | 1,235,762,487 cpu/instructions/ # 1.25 insn per cycle | ||
25 | |||
26 | 1.002667198 seconds time elapsed | ||
27 | ``` | ||
28 | |||
29 | Fixes: 3612ca8e2935 ("perf stat: Fix the hard-coded metrics | ||
30 | calculation on the hybrid") | ||
31 | Signed-off-by: Ian Rogers <irogers@google.com> | ||
32 | Tested-by: James Clark <james.clark@linaro.org> | ||
33 | Tested-by: Leo Yan <leo.yan@arm.com> | ||
34 | Tested-by: Atish Patra <atishp@rivosinc.com> | ||
35 | --- | ||
36 | tools/perf/util/pmus.c | 20 +++++++++++++++++--- | ||
37 | tools/perf/util/stat-shadow.c | 3 ++- | ||
38 | 2 files changed, 19 insertions(+), 4 deletions(-) | ||
39 | |||
40 | diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tools/perf/util/pmus.c | ||
43 | +++ b/tools/perf/util/pmus.c | ||
44 | @@ -XXX,XX +XXX,XX @@ char *perf_pmus__default_pmu_name(void) | ||
45 | struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) | ||
46 | { | ||
47 | struct perf_pmu *pmu = evsel->pmu; | ||
48 | + bool legacy_core_type; | ||
49 | |||
50 | - if (!pmu) { | ||
51 | - pmu = perf_pmus__find_by_type(evsel->core.attr.type); | ||
52 | - ((struct evsel *)evsel)->pmu = pmu; | ||
53 | + if (pmu) | ||
54 | + return pmu; | ||
55 | + | ||
56 | + pmu = perf_pmus__find_by_type(evsel->core.attr.type); | ||
57 | + legacy_core_type = | ||
58 | + evsel->core.attr.type == PERF_TYPE_HARDWARE || | ||
59 | + evsel->core.attr.type == PERF_TYPE_HW_CACHE; | ||
60 | + if (!pmu && legacy_core_type) { | ||
61 | + if (perf_pmus__supports_extended_type()) { | ||
62 | + u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; | ||
63 | + | ||
64 | + pmu = perf_pmus__find_by_type(type); | ||
65 | + } else { | ||
66 | + pmu = perf_pmus__find_core_pmu(); | ||
67 | + } | ||
68 | } | ||
69 | + ((struct evsel *)evsel)->pmu = pmu; | ||
70 | return pmu; | ||
71 | } | ||
72 | |||
73 | diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/tools/perf/util/stat-shadow.c | ||
76 | +++ b/tools/perf/util/stat-shadow.c | ||
77 | @@ -XXX,XX +XXX,XX @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type | ||
78 | { | ||
79 | struct evsel *cur; | ||
80 | int evsel_ctx = evsel_context(evsel); | ||
81 | + struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel); | ||
82 | |||
83 | evlist__for_each_entry(evsel->evlist, cur) { | ||
84 | struct perf_stat_aggr *aggr; | ||
85 | @@ -XXX,XX +XXX,XX @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type | ||
86 | * Except the SW CLOCK events, | ||
87 | * ignore if not the PMU we're looking for. | ||
88 | */ | ||
89 | - if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu)) | ||
90 | + if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur))) | ||
91 | continue; | ||
92 | |||
93 | aggr = &cur->stats->aggr[aggr_idx]; | ||
94 | -- | ||
95 | 2.47.1.613.gc27f4b7a9f-goog | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
188 | + } | 188 | + } |
189 | if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { | 189 | if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { |
190 | pr_warning( | 190 | pr_warning( |
191 | "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" | 191 | "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" |
192 | -- | 192 | -- |
193 | 2.47.1.613.gc27f4b7a9f-goog | 193 | 2.49.0.395.g12beb8f557-goog | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
359 | + /*head_config=*/NULL, /*wildcard=*/false); | 359 | + /*head_config=*/NULL, /*wildcard=*/false); |
360 | if (err) | 360 | if (err) |
361 | PE_ABORT(err); | 361 | PE_ABORT(err); |
362 | $$ = list; | 362 | $$ = list; |
363 | -- | 363 | -- |
364 | 2.47.1.613.gc27f4b7a9f-goog | 364 | 2.49.0.395.g12beb8f557-goog | diff view generated by jsdifflib |