... | ... | ||
---|---|---|---|
19 | to a more consistent cpu_cycles and avoided the problem. With these | 19 | to a more consistent cpu_cycles and avoided the problem. With these |
20 | changes the problematic event will now be skipped, a large warning | 20 | changes the problematic event will now be skipped, a large warning |
21 | produced, and perf record will continue for the other PMU events. This | 21 | produced, and perf record will continue for the other PMU events. This |
22 | solution was proposed by Arnaldo. | 22 | solution was proposed by Arnaldo. |
23 | 23 | ||
24 | Two minor changes have been added to help with the error message and | 24 | v6: Rebase of v5 (dropping already merged patches): |
25 | to work around issues occurring with "perf stat metrics (shadow stat) | 25 | https://lore.kernel.org/lkml/20250109222109.567031-1-irogers@google.com/ |
26 | test". | 26 | that unusually had an RFC posted for it: |
27 | https://lore.kernel.org/lkml/Z7Z5kv75BMML2A1q@google.com/ | ||
28 | Note, this patch conflicts/contradicts: | ||
29 | https://lore.kernel.org/lkml/20250312211623.2495798-1-irogers@google.com/ | ||
30 | that I posted so that we could either consistently prioritize | ||
31 | sysfs/json (these patches) or legacy events (the other | ||
32 | patches). That lack of event printing and encoding inconsistency | ||
33 | is most prominent in the encoding of events like "instructions" | ||
34 | which on hybrid are reported as "cpu_core/instructions/" but | ||
35 | "instructions" before these patches gets a legacy encoding while | ||
36 | "cpu_core/instructions/" gets a sysfs/json encoding. These patches | ||
37 | make "instructions" always get a sysfs/json encoding while the | ||
38 | alternate patches make it always get a legacy encoding. | ||
27 | 39 | ||
28 | The patches have only been tested on my x86 non-hybrid laptop. | 40 | v5: Follow Namhyung's suggestion and ignore the case where command |
41 | line dummy events fail to open alongside other events that all | ||
42 | fail to open. Note, the Tested-by tags are left on the series as | ||
43 | v4 and v5 were changing an error case that doesn't occur in | ||
44 | testing but was manually tested by myself. | ||
29 | 45 | ||
30 | v4: Rework the no events opening change from v3 to make it handle | 46 | v4: Rework the no events opening change from v3 to make it handle |
31 | multiple dummy events. Sadly an evlist isn't empty if it just | 47 | multiple dummy events. Sadly an evlist isn't empty if it just |
32 | contains dummy events as the dummy event may be used with "perf | 48 | contains dummy events as the dummy event may be used with "perf |
33 | record -e dummy .." as a way to determine whether permission | 49 | record -e dummy .." as a way to determine whether permission |
... | ... | ||
46 | passes my manual testing. | 62 | passes my manual testing. |
47 | 63 | ||
48 | v3: Make no events opening for perf record a failure as suggested by | 64 | v3: Make no events opening for perf record a failure as suggested by |
49 | James Clark and Aditya Bodkhe <Aditya.Bodkhe1@ibm.com>. Also, | 65 | James Clark and Aditya Bodkhe <Aditya.Bodkhe1@ibm.com>. Also, |
50 | rebase. | 66 | rebase. |
67 | |||
51 | v2: Rebase and add tested-by tags from James Clark, Leo Yan and Atish | 68 | v2: Rebase and add tested-by tags from James Clark, Leo Yan and Atish |
52 | Patra who have tested on RISC-V and ARM CPUs, including the | 69 | Patra who have tested on RISC-V and ARM CPUs, including the |
53 | problem case from before. | 70 | problem case from before. |
54 | 71 | ||
55 | Ian Rogers (4): | 72 | Ian Rogers (2): |
56 | perf evsel: Add pmu_name helper | ||
57 | perf stat: Fix find_stat for mixed legacy/non-legacy events | ||
58 | perf record: Skip don't fail for events that don't open | 73 | perf record: Skip don't fail for events that don't open |
59 | perf parse-events: Reapply "Prefer sysfs/JSON hardware events over | 74 | perf parse-events: Reapply "Prefer sysfs/JSON hardware events over |
60 | legacy" | 75 | legacy" |
61 | 76 | ||
62 | tools/perf/builtin-record.c | 54 +++++++++++++++++++++--- | 77 | tools/perf/builtin-record.c | 47 ++++++++++++++++++--- |
63 | tools/perf/util/evsel.c | 10 +++++ | ||
64 | tools/perf/util/evsel.h | 1 + | ||
65 | tools/perf/util/parse-events.c | 26 +++++++++--- | 78 | tools/perf/util/parse-events.c | 26 +++++++++--- |
66 | tools/perf/util/parse-events.l | 76 +++++++++++++++++----------------- | 79 | tools/perf/util/parse-events.l | 76 +++++++++++++++++----------------- |
67 | tools/perf/util/parse-events.y | 60 ++++++++++++++++++--------- | 80 | tools/perf/util/parse-events.y | 60 ++++++++++++++++++--------- |
68 | tools/perf/util/pmus.c | 20 +++++++-- | 81 | 4 files changed, 139 insertions(+), 70 deletions(-) |
69 | tools/perf/util/stat-shadow.c | 3 +- | ||
70 | 8 files changed, 176 insertions(+), 74 deletions(-) | ||
71 | 82 | ||
72 | -- | 83 | -- |
73 | 2.47.1.613.gc27f4b7a9f-goog | 84 | 2.49.0.395.g12beb8f557-goog | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add helper to get the name of the evsel's PMU. This handles the case | ||
2 | where there's no sysfs PMU via parse_events event_type helper. | ||
3 | 1 | ||
4 | Signed-off-by: Ian Rogers <irogers@google.com> | ||
5 | Tested-by: James Clark <james.clark@linaro.org> | ||
6 | Tested-by: Leo Yan <leo.yan@arm.com> | ||
7 | Tested-by: Atish Patra <atishp@rivosinc.com> | ||
8 | --- | ||
9 | tools/perf/util/evsel.c | 10 ++++++++++ | ||
10 | tools/perf/util/evsel.h | 1 + | ||
11 | 2 files changed, 11 insertions(+) | ||
12 | |||
13 | diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tools/perf/util/evsel.c | ||
16 | +++ b/tools/perf/util/evsel.c | ||
17 | @@ -XXX,XX +XXX,XX @@ int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel), | ||
18 | return 0; | ||
19 | } | ||
20 | |||
21 | +const char *evsel__pmu_name(const struct evsel *evsel) | ||
22 | +{ | ||
23 | + struct perf_pmu *pmu = evsel__find_pmu(evsel); | ||
24 | + | ||
25 | + if (pmu) | ||
26 | + return pmu->name; | ||
27 | + | ||
28 | + return event_type(evsel->core.attr.type); | ||
29 | +} | ||
30 | + | ||
31 | #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) | ||
32 | |||
33 | int __evsel__sample_size(u64 sample_type) | ||
34 | diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/tools/perf/util/evsel.h | ||
37 | +++ b/tools/perf/util/evsel.h | ||
38 | @@ -XXX,XX +XXX,XX @@ int evsel__object_config(size_t object_size, | ||
39 | void (*fini)(struct evsel *evsel)); | ||
40 | |||
41 | struct perf_pmu *evsel__find_pmu(const struct evsel *evsel); | ||
42 | +const char *evsel__pmu_name(const struct evsel *evsel); | ||
43 | bool evsel__is_aux_event(const struct evsel *evsel); | ||
44 | |||
45 | struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); | ||
46 | -- | ||
47 | 2.47.1.613.gc27f4b7a9f-goog | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Legacy events typically don't have a PMU when added leading to | ||
2 | mismatched legacy/non-legacy cases in find_stat. Use evsel__find_pmu | ||
3 | to make sure the evsel PMU is looked up. Update the evsel__find_pmu | ||
4 | code to look for the PMU using the extended config type or, for legacy | ||
5 | hardware/hw_cache events on non-hybrid systems, just use the core PMU. | ||
6 | 1 | ||
7 | Before: | ||
8 | ``` | ||
9 | $ perf stat -e cycles,cpu/instructions/ -a sleep 1 | ||
10 | Performance counter stats for 'system wide': | ||
11 | |||
12 | 215,309,764 cycles | ||
13 | 44,326,491 cpu/instructions/ | ||
14 | |||
15 | 1.002555314 seconds time elapsed | ||
16 | ``` | ||
17 | After: | ||
18 | ``` | ||
19 | $ perf stat -e cycles,cpu/instructions/ -a sleep 1 | ||
20 | |||
21 | Performance counter stats for 'system wide': | ||
22 | |||
23 | 990,676,332 cycles | ||
24 | 1,235,762,487 cpu/instructions/ # 1.25 insn per cycle | ||
25 | |||
26 | 1.002667198 seconds time elapsed | ||
27 | ``` | ||
28 | |||
29 | Fixes: 3612ca8e2935 ("perf stat: Fix the hard-coded metrics | ||
30 | calculation on the hybrid") | ||
31 | Signed-off-by: Ian Rogers <irogers@google.com> | ||
32 | Tested-by: James Clark <james.clark@linaro.org> | ||
33 | Tested-by: Leo Yan <leo.yan@arm.com> | ||
34 | Tested-by: Atish Patra <atishp@rivosinc.com> | ||
35 | --- | ||
36 | tools/perf/util/pmus.c | 20 +++++++++++++++++--- | ||
37 | tools/perf/util/stat-shadow.c | 3 ++- | ||
38 | 2 files changed, 19 insertions(+), 4 deletions(-) | ||
39 | |||
40 | diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tools/perf/util/pmus.c | ||
43 | +++ b/tools/perf/util/pmus.c | ||
44 | @@ -XXX,XX +XXX,XX @@ char *perf_pmus__default_pmu_name(void) | ||
45 | struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) | ||
46 | { | ||
47 | struct perf_pmu *pmu = evsel->pmu; | ||
48 | + bool legacy_core_type; | ||
49 | |||
50 | - if (!pmu) { | ||
51 | - pmu = perf_pmus__find_by_type(evsel->core.attr.type); | ||
52 | - ((struct evsel *)evsel)->pmu = pmu; | ||
53 | + if (pmu) | ||
54 | + return pmu; | ||
55 | + | ||
56 | + pmu = perf_pmus__find_by_type(evsel->core.attr.type); | ||
57 | + legacy_core_type = | ||
58 | + evsel->core.attr.type == PERF_TYPE_HARDWARE || | ||
59 | + evsel->core.attr.type == PERF_TYPE_HW_CACHE; | ||
60 | + if (!pmu && legacy_core_type) { | ||
61 | + if (perf_pmus__supports_extended_type()) { | ||
62 | + u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; | ||
63 | + | ||
64 | + pmu = perf_pmus__find_by_type(type); | ||
65 | + } else { | ||
66 | + pmu = perf_pmus__find_core_pmu(); | ||
67 | + } | ||
68 | } | ||
69 | + ((struct evsel *)evsel)->pmu = pmu; | ||
70 | return pmu; | ||
71 | } | ||
72 | |||
73 | diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/tools/perf/util/stat-shadow.c | ||
76 | +++ b/tools/perf/util/stat-shadow.c | ||
77 | @@ -XXX,XX +XXX,XX @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type | ||
78 | { | ||
79 | struct evsel *cur; | ||
80 | int evsel_ctx = evsel_context(evsel); | ||
81 | + struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel); | ||
82 | |||
83 | evlist__for_each_entry(evsel->evlist, cur) { | ||
84 | struct perf_stat_aggr *aggr; | ||
85 | @@ -XXX,XX +XXX,XX @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type | ||
86 | * Except the SW CLOCK events, | ||
87 | * ignore if not the PMU we're looking for. | ||
88 | */ | ||
89 | - if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu)) | ||
90 | + if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur))) | ||
91 | continue; | ||
92 | |||
93 | aggr = &cur->stats->aggr[aggr_idx]; | ||
94 | -- | ||
95 | 2.47.1.613.gc27f4b7a9f-goog | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
60 | The LLC-prefetch-read event is not supported. | 60 | The LLC-prefetch-read event is not supported. |
61 | Error: | 61 | Error: |
62 | Failure to open any events for recording | 62 | Failure to open any events for recording |
63 | ``` | 63 | ``` |
64 | 64 | ||
65 | This is done by detecting if dummy events were implicitly added by | 65 | As an evlist may have dummy events that open when all command line |
66 | perf and seeing if the evlist is empty without them. This allows the | 66 | events fail we ignore dummy events when detecting if at least some |
67 | dummy event still to be recorded: | 67 | events open. This still permits the dummy event on its own to be used |
68 | as a permission check: | ||
68 | ``` | 69 | ``` |
69 | $ perf record -e dummy true | 70 | $ perf record -e dummy true |
70 | [ perf record: Woken up 1 times to write data ] | 71 | [ perf record: Woken up 1 times to write data ] |
71 | [ perf record: Captured and wrote 0.046 MB perf.data ] | 72 | [ perf record: Captured and wrote 0.046 MB perf.data ] |
72 | ``` | 73 | ``` |
73 | but fail when inserted: | 74 | but allows failure when a dummy event is implicilty inserted or when |
75 | there are insufficient permissions to open it: | ||
74 | ``` | 76 | ``` |
75 | $ perf record -e LLC-prefetch-read -a true | 77 | $ perf record -e LLC-prefetch-read -a true |
76 | Error: | 78 | Error: |
77 | Failure to open event 'LLC-prefetch-read' on PMU 'cpu' which will be removed. | 79 | Failure to open event 'LLC-prefetch-read' on PMU 'cpu' which will be removed. |
78 | The LLC-prefetch-read event is not supported. | 80 | The LLC-prefetch-read event is not supported. |
... | ... | ||
106 | Signed-off-by: Ian Rogers <irogers@google.com> | 108 | Signed-off-by: Ian Rogers <irogers@google.com> |
107 | Tested-by: James Clark <james.clark@linaro.org> | 109 | Tested-by: James Clark <james.clark@linaro.org> |
108 | Tested-by: Leo Yan <leo.yan@arm.com> | 110 | Tested-by: Leo Yan <leo.yan@arm.com> |
109 | Tested-by: Atish Patra <atishp@rivosinc.com> | 111 | Tested-by: Atish Patra <atishp@rivosinc.com> |
110 | --- | 112 | --- |
111 | tools/perf/builtin-record.c | 54 ++++++++++++++++++++++++++++++++----- | 113 | tools/perf/builtin-record.c | 47 ++++++++++++++++++++++++++++++++----- |
112 | 1 file changed, 48 insertions(+), 6 deletions(-) | 114 | 1 file changed, 41 insertions(+), 6 deletions(-) |
113 | 115 | ||
114 | diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c | 116 | diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c |
115 | index XXXXXXX..XXXXXXX 100644 | 117 | index XXXXXXX..XXXXXXX 100644 |
116 | --- a/tools/perf/builtin-record.c | 118 | --- a/tools/perf/builtin-record.c |
117 | +++ b/tools/perf/builtin-record.c | 119 | +++ b/tools/perf/builtin-record.c |
118 | @@ -XXX,XX +XXX,XX @@ struct record { | ||
119 | struct evlist *sb_evlist; | ||
120 | pthread_t thread_id; | ||
121 | int realtime_prio; | ||
122 | + int num_parsed_dummy_events; | ||
123 | bool switch_output_event_set; | ||
124 | bool no_buildid; | ||
125 | bool no_buildid_set; | ||
126 | @@ -XXX,XX +XXX,XX @@ static int record__config_tracking_events(struct record *rec) | 120 | @@ -XXX,XX +XXX,XX @@ static int record__config_tracking_events(struct record *rec) |
127 | */ | 121 | */ |
128 | if (opts->target.initial_delay || target__has_cpu(&opts->target) || | 122 | if (opts->target.initial_delay || target__has_cpu(&opts->target) || |
129 | perf_pmus__num_core_pmus() > 1) { | 123 | perf_pmus__num_core_pmus() > 1) { |
130 | - | 124 | - |
... | ... | ||
158 | - pos->supported = true; | 152 | - pos->supported = true; |
159 | } | 153 | } |
160 | 154 | ||
161 | + if (skipped) { | 155 | + if (skipped) { |
162 | + struct evsel *tmp; | 156 | + struct evsel *tmp; |
163 | + int idx = 0, num_dummy = 0, num_non_dummy = 0, | 157 | + int idx = 0; |
164 | + removed_dummy = 0, removed_non_dummy = 0; | 158 | + bool evlist_empty = true; |
165 | + | 159 | + |
166 | + /* Remove evsels that failed to open and update indices. */ | 160 | + /* Remove evsels that failed to open and update indices. */ |
167 | + evlist__for_each_entry_safe(evlist, tmp, pos) { | 161 | + evlist__for_each_entry_safe(evlist, tmp, pos) { |
168 | + if (evsel__is_dummy_event(pos)) | 162 | + if (pos->skippable) { |
169 | + num_dummy++; | 163 | + evlist__remove(evlist, pos); |
170 | + else | 164 | + continue; |
171 | + num_non_dummy++; | 165 | + } |
172 | + | 166 | + |
173 | + if (!pos->skippable) | 167 | + /* |
174 | + continue; | 168 | + * Note, dummy events may be command line parsed or |
175 | + | 169 | + * added by the tool. We care about supporting `perf |
176 | + if (evsel__is_dummy_event(pos)) | 170 | + * record -e dummy` which may be used as a permission |
177 | + removed_dummy++; | 171 | + * check. Dummy events that are added to the command |
178 | + else | 172 | + * line and opened along with other events that fail, |
179 | + removed_non_dummy++; | 173 | + * will still fail as if the dummy events were tool |
180 | + | 174 | + * added events for the sake of code simplicity. |
181 | + evlist__remove(evlist, pos); | 175 | + */ |
176 | + if (!evsel__is_dummy_event(pos)) | ||
177 | + evlist_empty = false; | ||
182 | + } | 178 | + } |
183 | + evlist__for_each_entry(evlist, pos) { | 179 | + evlist__for_each_entry(evlist, pos) { |
184 | + pos->core.idx = idx++; | 180 | + pos->core.idx = idx++; |
185 | + } | 181 | + } |
186 | + /* If list is empty except implicitly added dummy events then fail. */ | 182 | + /* If list is empty then fail. */ |
187 | + if ((num_non_dummy == removed_non_dummy) && | 183 | + if (evlist_empty) { |
188 | + ((rec->num_parsed_dummy_events == 0) || | ||
189 | + (removed_dummy >= (num_dummy - rec->num_parsed_dummy_events)))) { | ||
190 | + ui__error("Failure to open any events for recording.\n"); | 184 | + ui__error("Failure to open any events for recording.\n"); |
191 | + rc = -1; | 185 | + rc = -1; |
192 | + goto out; | 186 | + goto out; |
193 | + } | 187 | + } |
194 | + } | 188 | + } |
195 | if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { | 189 | if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { |
196 | pr_warning( | 190 | pr_warning( |
197 | "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" | 191 | "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" |
198 | @@ -XXX,XX +XXX,XX @@ int cmd_record(int argc, const char **argv) | ||
199 | int err; | ||
200 | struct record *rec = &record; | ||
201 | char errbuf[BUFSIZ]; | ||
202 | + struct evsel *evsel; | ||
203 | |||
204 | setlocale(LC_ALL, ""); | ||
205 | |||
206 | @@ -XXX,XX +XXX,XX @@ int cmd_record(int argc, const char **argv) | ||
207 | if (quiet) | ||
208 | perf_quiet_option(); | ||
209 | |||
210 | + evlist__for_each_entry(rec->evlist, evsel) { | ||
211 | + if (evsel__is_dummy_event(evsel)) | ||
212 | + rec->num_parsed_dummy_events++; | ||
213 | + } | ||
214 | + | ||
215 | err = symbol__validate_sym_arguments(); | ||
216 | if (err) | ||
217 | return err; | ||
218 | -- | 192 | -- |
219 | 2.47.1.613.gc27f4b7a9f-goog | 193 | 2.49.0.395.g12beb8f557-goog | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
359 | + /*head_config=*/NULL, /*wildcard=*/false); | 359 | + /*head_config=*/NULL, /*wildcard=*/false); |
360 | if (err) | 360 | if (err) |
361 | PE_ABORT(err); | 361 | PE_ABORT(err); |
362 | $$ = list; | 362 | $$ = list; |
363 | -- | 363 | -- |
364 | 2.47.1.613.gc27f4b7a9f-goog | 364 | 2.49.0.395.g12beb8f557-goog | diff view generated by jsdifflib |