...
...
19
to a more consistent cpu_cycles and avoided the problem. With these
19
to a more consistent cpu_cycles and avoided the problem. With these
20
changes the problematic event will now be skipped, a large warning
20
changes the problematic event will now be skipped, a large warning
21
produced, and perf record will continue for the other PMU events. This
21
produced, and perf record will continue for the other PMU events. This
22
solution was proposed by Arnaldo.
22
solution was proposed by Arnaldo.
23
23
24
Two minor changes have been added to help with the error message and
24
v6: Rebase of v5 (dropping already merged patches):
25
to work around issues occurring with "perf stat metrics (shadow stat)
25
https://lore.kernel.org/lkml/20250109222109.567031-1-irogers@google.com/
26
test".
26
that unusually had an RFC posted for it:
27
https://lore.kernel.org/lkml/Z7Z5kv75BMML2A1q@google.com/
28
Note, this patch conflicts/contradicts:
29
https://lore.kernel.org/lkml/20250312211623.2495798-1-irogers@google.com/
30
that I posted so that we could either consistently prioritize
31
sysfs/json (these patches) or legacy events (the other
32
patches). That lack of event printing and encoding inconsistency
33
is most prominent in the encoding of events like "instructions"
34
which on hybrid are reported as "cpu_core/instructions/" but
35
"instructions" before these patches gets a legacy encoding while
36
"cpu_core/instructions/" gets a sysfs/json encoding. These patches
37
make "instructions" always get a sysfs/json encoding while the
38
alternate patches make it always get a legacy encoding.
27
39
28
The patches have only been tested on my x86 non-hybrid laptop.
40
v5: Follow Namhyung's suggestion and ignore the case where command
41
line dummy events fail to open alongside other events that all
42
fail to open. Note, the Tested-by tags are left on the series as
43
v4 and v5 were changing an error case that doesn't occur in
44
testing but was manually tested by myself.
29
45
30
Ian Rogers (4):
46
v4: Rework the no events opening change from v3 to make it handle
31
perf evsel: Add pmu_name helper
47
multiple dummy events. Sadly an evlist isn't empty if it just
32
perf stat: Fix find_stat for mixed legacy/non-legacy events
48
contains dummy events as the dummy event may be used with "perf
49
record -e dummy .." as a way to determine whether permission
50
issues exist. Other software events like cpu-clock would suffice
51
for this, but the using dummy genie has left the bottle.
52
53
Another problem is that we appear to have an excessive number of
54
dummy events added, for example, we can likely avoid a dummy event
55
and add sideband data to the original event. For auxtrace more
56
dummy events may be opened too. Anyway, this has led to the
57
approach taken in patch 3 where the number of dummy parsed events
58
is computed. If the number of removed/failing-to-open non-dummy
59
events matches the number of non-dummy events then we want to
60
fail, but only if there are no parsed dummy events or if there was
61
one then it must have opened. The math here is hard to read, but
62
passes my manual testing.
63
64
v3: Make no events opening for perf record a failure as suggested by
65
James Clark and Aditya Bodkhe <Aditya.Bodkhe1@ibm.com>. Also,
66
rebase.
67
68
v2: Rebase and add tested-by tags from James Clark, Leo Yan and Atish
69
Patra who have tested on RISC-V and ARM CPUs, including the
70
problem case from before.
71
72
Ian Rogers (2):
33
perf record: Skip don't fail for events that don't open
73
perf record: Skip don't fail for events that don't open
34
perf parse-events: Reapply "Prefer sysfs/JSON hardware events over
74
perf parse-events: Reapply "Prefer sysfs/JSON hardware events over
35
legacy"
75
legacy"
36
76
37
tools/perf/builtin-record.c | 22 +++++++---
77
tools/perf/builtin-record.c | 47 ++++++++++++++++++---
38
tools/perf/util/evsel.c | 10 +++++
39
tools/perf/util/evsel.h | 1 +
40
tools/perf/util/parse-events.c | 26 +++++++++---
78
tools/perf/util/parse-events.c | 26 +++++++++---
41
tools/perf/util/parse-events.l | 76 +++++++++++++++++-----------------
79
tools/perf/util/parse-events.l | 76 +++++++++++++++++-----------------
42
tools/perf/util/parse-events.y | 60 ++++++++++++++++++---------
80
tools/perf/util/parse-events.y | 60 ++++++++++++++++++---------
43
tools/perf/util/pmus.c | 20 +++++++--
81
4 files changed, 139 insertions(+), 70 deletions(-)
44
tools/perf/util/stat-shadow.c | 3 +-
45
8 files changed, 145 insertions(+), 73 deletions(-)
46
82
47
--
83
--
48
2.47.0.163.g1226f6d8fa-goog
84
2.49.0.395.g12beb8f557-goog
diff view generated by jsdifflib
Deleted patch
1
Add helper to get the name of the evsel's PMU. This handles the case
2
where there's no sysfs PMU via parse_events event_type helper.
3
1
4
Signed-off-by: Ian Rogers <irogers@google.com>
5
---
6
tools/perf/util/evsel.c | 10 ++++++++++
7
tools/perf/util/evsel.h | 1 +
8
2 files changed, 11 insertions(+)
9
10
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tools/perf/util/evsel.c
13
+++ b/tools/perf/util/evsel.c
14
@@ -XXX,XX +XXX,XX @@ int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
15
    return 0;
16
}
17
18
+const char *evsel__pmu_name(const struct evsel *evsel)
19
+{
20
+    struct perf_pmu *pmu = evsel__find_pmu(evsel);
21
+
22
+    if (pmu)
23
+        return pmu->name;
24
+
25
+    return event_type(evsel->core.attr.type);
26
+}
27
+
28
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
29
30
int __evsel__sample_size(u64 sample_type)
31
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tools/perf/util/evsel.h
34
+++ b/tools/perf/util/evsel.h
35
@@ -XXX,XX +XXX,XX @@ int evsel__object_config(size_t object_size,
36
             void (*fini)(struct evsel *evsel));
37
38
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel);
39
+const char *evsel__pmu_name(const struct evsel *evsel);
40
bool evsel__is_aux_event(const struct evsel *evsel);
41
42
struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
43
--
44
2.47.0.163.g1226f6d8fa-goog
diff view generated by jsdifflib
Deleted patch
1
Legacy events typically don't have a PMU when added leading to
2
mismatched legacy/non-legacy cases in find_stat. Use evsel__find_pmu
3
to make sure the evsel PMU is looked up. Update the evsel__find_pmu
4
code to look for the PMU using the extended config type or, for legacy
5
hardware/hw_cache events on non-hybrid systems, just use the core PMU.
6
1
7
Before:
8
```
9
$ perf stat -e cycles,cpu/instructions/ -a sleep 1
10
Performance counter stats for 'system wide':
11
12
215,309,764 cycles
13
44,326,491 cpu/instructions/
14
15
1.002555314 seconds time elapsed
16
```
17
After:
18
```
19
$ perf stat -e cycles,cpu/instructions/ -a sleep 1
20
21
Performance counter stats for 'system wide':
22
23
990,676,332 cycles
24
1,235,762,487 cpu/instructions/ # 1.25 insn per cycle
25
26
1.002667198 seconds time elapsed
27
```
28
29
Fixes: 3612ca8e2935 ("perf stat: Fix the hard-coded metrics
30
calculation on the hybrid")
31
Signed-off-by: Ian Rogers <irogers@google.com>
32
---
33
tools/perf/util/pmus.c | 20 +++++++++++++++++---
34
tools/perf/util/stat-shadow.c | 3 ++-
35
2 files changed, 19 insertions(+), 4 deletions(-)
36
37
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/tools/perf/util/pmus.c
40
+++ b/tools/perf/util/pmus.c
41
@@ -XXX,XX +XXX,XX @@ char *perf_pmus__default_pmu_name(void)
42
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
43
{
44
    struct perf_pmu *pmu = evsel->pmu;
45
+    bool legacy_core_type;
46
47
-    if (!pmu) {
48
-        pmu = perf_pmus__find_by_type(evsel->core.attr.type);
49
-        ((struct evsel *)evsel)->pmu = pmu;
50
+    if (pmu)
51
+        return pmu;
52
+
53
+    pmu = perf_pmus__find_by_type(evsel->core.attr.type);
54
+    legacy_core_type =
55
+        evsel->core.attr.type == PERF_TYPE_HARDWARE ||
56
+        evsel->core.attr.type == PERF_TYPE_HW_CACHE;
57
+    if (!pmu && legacy_core_type) {
58
+        if (perf_pmus__supports_extended_type()) {
59
+            u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
60
+
61
+            pmu = perf_pmus__find_by_type(type);
62
+        } else {
63
+            pmu = perf_pmus__find_core_pmu();
64
+        }
65
    }
66
+    ((struct evsel *)evsel)->pmu = pmu;
67
    return pmu;
68
}
69
70
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/tools/perf/util/stat-shadow.c
73
+++ b/tools/perf/util/stat-shadow.c
74
@@ -XXX,XX +XXX,XX @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type
75
{
76
    struct evsel *cur;
77
    int evsel_ctx = evsel_context(evsel);
78
+    struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel);
79
80
    evlist__for_each_entry(evsel->evlist, cur) {
81
        struct perf_stat_aggr *aggr;
82
@@ -XXX,XX +XXX,XX @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type
83
         * Except the SW CLOCK events,
84
         * ignore if not the PMU we're looking for.
85
         */
86
-        if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu))
87
+        if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur)))
88
            continue;
89
90
        aggr = &cur->stats->aggr[aggr_idx];
91
--
92
2.47.0.163.g1226f6d8fa-goog
diff view generated by jsdifflib
...
...
50
FINISHED_INIT events: 1 ( 0.0%)
50
FINISHED_INIT events: 1 ( 0.0%)
51
cycles stats:
51
cycles stats:
52
SAMPLE events: 87
52
SAMPLE events: 87
53
```
53
```
54
54
55
Note, if all events fail to open then the data file will contain no
55
If all events fail to open then the perf record will fail:
56
samples. This is deliberate as at the point the events are opened
56
```
57
there are other events, such as the dummy event for sideband data, and
57
$ perf record -e LLC-prefetch-read true
58
these events will succeed in opening even if the user specified ones
58
Error:
59
don't. Having a mix of open and broken events leads to a problem of
59
Failure to open event 'LLC-prefetch-read' on PMU 'cpu' which will be removed.
60
identifying different sources of events.
60
The LLC-prefetch-read event is not supported.
61
Error:
62
Failure to open any events for recording
63
```
64
65
As an evlist may have dummy events that open when all command line
66
events fail we ignore dummy events when detecting if at least some
67
events open. This still permits the dummy event on its own to be used
68
as a permission check:
69
```
70
$ perf record -e dummy true
71
[ perf record: Woken up 1 times to write data ]
72
[ perf record: Captured and wrote 0.046 MB perf.data ]
73
```
74
but allows failure when a dummy event is implicilty inserted or when
75
there are insufficient permissions to open it:
76
```
77
$ perf record -e LLC-prefetch-read -a true
78
Error:
79
Failure to open event 'LLC-prefetch-read' on PMU 'cpu' which will be removed.
80
The LLC-prefetch-read event is not supported.
81
Error:
82
Failure to open any events for recording
83
```
61
84
62
The issue with legacy events is that on RISC-V they want the driver to
85
The issue with legacy events is that on RISC-V they want the driver to
63
not have mappings from legacy to non-legacy config encodings for each
86
not have mappings from legacy to non-legacy config encodings for each
64
vendor/model due to size, complexity and difficulty to update. It was
87
vendor/model due to size, complexity and difficulty to update. It was
65
reported that on ARM Apple-M? CPUs the legacy mapping in the driver
88
reported that on ARM Apple-M? CPUs the legacy mapping in the driver
...
...
81
messages like those above because of the uncore PMU advertising legacy
104
messages like those above because of the uncore PMU advertising legacy
82
event names.
105
event names.
83
106
84
Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
107
Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
85
Signed-off-by: Ian Rogers <irogers@google.com>
108
Signed-off-by: Ian Rogers <irogers@google.com>
109
Tested-by: James Clark <james.clark@linaro.org>
110
Tested-by: Leo Yan <leo.yan@arm.com>
111
Tested-by: Atish Patra <atishp@rivosinc.com>
86
---
112
---
87
tools/perf/builtin-record.c | 22 +++++++++++++++++-----
113
tools/perf/builtin-record.c | 47 ++++++++++++++++++++++++++++++++-----
88
1 file changed, 17 insertions(+), 5 deletions(-)
114
1 file changed, 41 insertions(+), 6 deletions(-)
89
115
90
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
116
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
91
index XXXXXXX..XXXXXXX 100644
117
index XXXXXXX..XXXXXXX 100644
92
--- a/tools/perf/builtin-record.c
118
--- a/tools/perf/builtin-record.c
93
+++ b/tools/perf/builtin-record.c
119
+++ b/tools/perf/builtin-record.c
120
@@ -XXX,XX +XXX,XX @@ static int record__config_tracking_events(struct record *rec)
121
     */
122
    if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
123
     perf_pmus__num_core_pmus() > 1) {
124
-
125
        /*
126
         * User space tasks can migrate between CPUs, so when tracing
127
         * selected CPUs, sideband for all CPUs is still needed.
94
@@ -XXX,XX +XXX,XX @@ static int record__open(struct record *rec)
128
@@ -XXX,XX +XXX,XX @@ static int record__open(struct record *rec)
95
    struct perf_session *session = rec->session;
129
    struct perf_session *session = rec->session;
96
    struct record_opts *opts = &rec->opts;
130
    struct record_opts *opts = &rec->opts;
97
    int rc = 0;
131
    int rc = 0;
98
+    bool skipped = false;
132
+    bool skipped = false;
...
...
119
    }
153
    }
120
154
121
+    if (skipped) {
155
+    if (skipped) {
122
+        struct evsel *tmp;
156
+        struct evsel *tmp;
123
+        int idx = 0;
157
+        int idx = 0;
158
+        bool evlist_empty = true;
124
+
159
+
160
+        /* Remove evsels that failed to open and update indices. */
125
+        evlist__for_each_entry_safe(evlist, tmp, pos) {
161
+        evlist__for_each_entry_safe(evlist, tmp, pos) {
126
+            if (pos->skippable)
162
+            if (pos->skippable) {
127
+                evlist__remove(evlist, pos);
163
+                evlist__remove(evlist, pos);
164
+                continue;
165
+            }
166
+
167
+            /*
168
+             * Note, dummy events may be command line parsed or
169
+             * added by the tool. We care about supporting `perf
170
+             * record -e dummy` which may be used as a permission
171
+             * check. Dummy events that are added to the command
172
+             * line and opened along with other events that fail,
173
+             * will still fail as if the dummy events were tool
174
+             * added events for the sake of code simplicity.
175
+             */
176
+            if (!evsel__is_dummy_event(pos))
177
+                evlist_empty = false;
178
+        }
179
+        evlist__for_each_entry(evlist, pos) {
128
+            pos->core.idx = idx++;
180
+            pos->core.idx = idx++;
181
+        }
182
+        /* If list is empty then fail. */
183
+        if (evlist_empty) {
184
+            ui__error("Failure to open any events for recording.\n");
185
+            rc = -1;
186
+            goto out;
129
+        }
187
+        }
130
+    }
188
+    }
131
    if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
189
    if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
132
        pr_warning(
190
        pr_warning(
133
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
191
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
134
--
192
--
135
2.47.0.163.g1226f6d8fa-goog
193
2.49.0.395.g12beb8f557-goog
diff view generated by jsdifflib
...
...
21
related code simplified.
21
related code simplified.
22
22
23
Signed-off-by: Ian Rogers <irogers@google.com>
23
Signed-off-by: Ian Rogers <irogers@google.com>
24
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
24
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
25
Tested-by: Atish Patra <atishp@rivosinc.com>
25
Tested-by: Atish Patra <atishp@rivosinc.com>
26
Tested-by: James Clark <james.clark@linaro.org>
27
Tested-by: Leo Yan <leo.yan@arm.com>
26
Cc: Adrian Hunter <adrian.hunter@intel.com>
28
Cc: Adrian Hunter <adrian.hunter@intel.com>
27
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
29
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
28
Cc: Beeman Strong <beeman@rivosinc.com>
30
Cc: Beeman Strong <beeman@rivosinc.com>
29
Cc: Ingo Molnar <mingo@redhat.com>
31
Cc: Ingo Molnar <mingo@redhat.com>
30
Cc: James Clark <james.clark@arm.com>
31
Cc: Jiri Olsa <jolsa@kernel.org>
32
Cc: Jiri Olsa <jolsa@kernel.org>
32
Cc: Mark Rutland <mark.rutland@arm.com>
33
Cc: Mark Rutland <mark.rutland@arm.com>
33
Cc: Namhyung Kim <namhyung@kernel.org>
34
Cc: Namhyung Kim <namhyung@kernel.org>
34
Cc: Peter Zijlstra <peterz@infradead.org>
35
Cc: Peter Zijlstra <peterz@infradead.org>
35
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
36
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
...
...
137
+    yylval->hardware_event.str = strdup(text);
138
+    yylval->hardware_event.str = strdup(text);
138
+    yylval->hardware_event.num = config;
139
+    yylval->hardware_event.num = config;
139
    return PE_TERM_HW;
140
    return PE_TERM_HW;
140
}
141
}
141
142
142
@@ -XXX,XX +XXX,XX @@ percore            { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
143
@@ -XXX,XX +XXX,XX @@ aux-output        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
143
aux-output        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
144
aux-action        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
144
aux-sample-size        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
145
aux-sample-size        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
145
metric-id        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
146
metric-id        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
146
-cpu-cycles|cycles                { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
147
-cpu-cycles|cycles                { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
147
-stalled-cycles-frontend|idle-cycles-frontend    { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
148
-stalled-cycles-frontend|idle-cycles-frontend    { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
148
-stalled-cycles-backend|idle-cycles-backend    { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
149
-stalled-cycles-backend|idle-cycles-backend    { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
...
...
358
+                /*head_config=*/NULL, /*wildcard=*/false);
359
+                /*head_config=*/NULL, /*wildcard=*/false);
359
    if (err)
360
    if (err)
360
        PE_ABORT(err);
361
        PE_ABORT(err);
361
    $$ = list;
362
    $$ = list;
362
--
363
--
363
2.47.0.163.g1226f6d8fa-goog
364
2.49.0.395.g12beb8f557-goog
diff view generated by jsdifflib