strcmp_cpuid_str performs regular expression comparisons. Avoid
repeated computation of the table by remembering the table in a
static.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
1 file changed, 28 insertions(+), 20 deletions(-)
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index fd009752b427..8d8d5088c53c 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
{
- const struct pmu_events_table *table = NULL;
- char *cpuid = perf_pmu__getcpuid(pmu);
+ static const struct pmu_events_table *table;
size_t i;
- /* on some platforms which uses cpus map, cpuid can be NULL for
- * PMUs other than CORE PMUs.
- */
- if (!cpuid)
- return NULL;
-
- i = 0;
- for (;;) {
- const struct pmu_events_map *map = &pmu_events_map[i++];
- if (!map->arch)
- break;
-
- if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
- table = &map->event_table;
- break;
+ if (!table) {
+ char *cpuid = perf_pmu__getcpuid(pmu);
+
+ /*
+ * On some platforms which uses cpus map, cpuid can be NULL for
+ * PMUs other than CORE PMUs.
+ */
+ if (!cpuid)
+ return NULL;
+
+ i = 0;
+ for (;;) {
+ const struct pmu_events_map *map = &pmu_events_map[i++];
+ if (!map->arch)
+ break;
+
+ if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
+ table = &map->event_table;
+ break;
+ }
}
+ free(cpuid);
}
- free(cpuid);
if (!pmu)
return table;
@@ -1015,13 +1019,17 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
{
- const struct pmu_metrics_table *table = NULL;
- char *cpuid = perf_pmu__getcpuid(pmu);
+ static const struct pmu_metrics_table *table;
+ char *cpuid;
int i;
+ if (table)
+ return table;
+
/* on some platforms which uses cpus map, cpuid can be NULL for
* PMUs other than CORE PMUs.
*/
+ cpuid = perf_pmu__getcpuid(pmu);
if (!cpuid)
return NULL;
--
2.42.0.609.gbb76f46606-goog
On 7/10/23 05:13, Ian Rogers wrote:
> strcmp_cpuid_str performs regular expression comparisons. Avoid
> repeated computation of the table by remembering the table in a
> static.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
> 1 file changed, 28 insertions(+), 20 deletions(-)
>
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index fd009752b427..8d8d5088c53c 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
>
> const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
> {
> - const struct pmu_events_table *table = NULL;
> - char *cpuid = perf_pmu__getcpuid(pmu);
> + static const struct pmu_events_table *table;
> size_t i;
>
> - /* on some platforms which uses cpus map, cpuid can be NULL for
> - * PMUs other than CORE PMUs.
> - */
> - if (!cpuid)
> - return NULL;
> -
> - i = 0;
> - for (;;) {
> - const struct pmu_events_map *map = &pmu_events_map[i++];
> - if (!map->arch)
> - break;
> -
> - if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> - table = &map->event_table;
> - break;
> + if (!table) {
> + char *cpuid = perf_pmu__getcpuid(pmu);
Seems to assume the function is never called with a pmu
that would give a different result for perf_pmu__getcpuid(pmu)
> +
> + /*
> + * On some platforms which uses cpus map, cpuid can be NULL for
> + * PMUs other than CORE PMUs.
> + */
> + if (!cpuid)
> + return NULL;
> +
> + i = 0;
> + for (;;) {
> + const struct pmu_events_map *map = &pmu_events_map[i++];
> + if (!map->arch)
> + break;
> +
> + if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> + table = &map->event_table;
> + break;
> + }
> }
> + free(cpuid);
> }
> - free(cpuid);
> if (!pmu)
> return table;
>
> @@ -1015,13 +1019,17 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
>
> const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
> {
> - const struct pmu_metrics_table *table = NULL;
> - char *cpuid = perf_pmu__getcpuid(pmu);
> + static const struct pmu_metrics_table *table;
> + char *cpuid;
> int i;
>
> + if (table)
> + return table;
Ditto
> +
> /* on some platforms which uses cpus map, cpuid can be NULL for
> * PMUs other than CORE PMUs.
> */
> + cpuid = perf_pmu__getcpuid(pmu);
> if (!cpuid)
> return NULL;
>
Hello,
On 2023/10/7 10:13, Ian Rogers wrote:
> strcmp_cpuid_str performs regular expression comparisons. Avoid
> repeated computation of the table by remembering the table in a
> static.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
> 1 file changed, 28 insertions(+), 20 deletions(-)
>
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index fd009752b427..8d8d5088c53c 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
>
> const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
> {
> - const struct pmu_events_table *table = NULL;
> - char *cpuid = perf_pmu__getcpuid(pmu);
> + static const struct pmu_events_table *table;
> size_t i;
>
> - /* on some platforms which uses cpus map, cpuid can be NULL for
> - * PMUs other than CORE PMUs.
> - */
> - if (!cpuid)
> - return NULL;
> -
> - i = 0;
> - for (;;) {
> - const struct pmu_events_map *map = &pmu_events_map[i++];
> - if (!map->arch)
> - break;
> -
> - if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> - table = &map->event_table;
> - break;
> + if (!table) {
If there is no matched table in pmu_events_map,
perf_pmu__find_events_table() will enter this branch for repeated search
each time.
Or do we need to use another variable to indicate whether the search has
been performed?
Thanks,
Yang
On Sat, Oct 7, 2023 at 8:39 PM Yang Jihong <yangjihong1@huawei.com> wrote:
>
> Hello,
>
> On 2023/10/7 10:13, Ian Rogers wrote:
> > strcmp_cpuid_str performs regular expression comparisons. Avoid
> > repeated computation of the table by remembering the table in a
> > static.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
> > 1 file changed, 28 insertions(+), 20 deletions(-)
> >
> > diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> > index fd009752b427..8d8d5088c53c 100755
> > --- a/tools/perf/pmu-events/jevents.py
> > +++ b/tools/perf/pmu-events/jevents.py
> > @@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
> >
> > const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
> > {
> > - const struct pmu_events_table *table = NULL;
> > - char *cpuid = perf_pmu__getcpuid(pmu);
> > + static const struct pmu_events_table *table;
> > size_t i;
> >
> > - /* on some platforms which uses cpus map, cpuid can be NULL for
> > - * PMUs other than CORE PMUs.
> > - */
> > - if (!cpuid)
> > - return NULL;
> > -
> > - i = 0;
> > - for (;;) {
> > - const struct pmu_events_map *map = &pmu_events_map[i++];
> > - if (!map->arch)
> > - break;
> > -
> > - if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> > - table = &map->event_table;
> > - break;
> > + if (!table) {
> If there is no matched table in pmu_events_map,
> perf_pmu__find_events_table() will enter this branch for repeated search
> each time.
> Or do we need to use another variable to indicate whether the search has
> been performed?
Agreed, the behavior will match the existing behavior. Longer term I
want to remove this code. Do you have a scenario we should optimize
for here?
Thanks,
Ian
> Thanks,
> Yang
Hello,
On 2023/10/8 13:49, Ian Rogers wrote:
> On Sat, Oct 7, 2023 at 8:39 PM Yang Jihong <yangjihong1@huawei.com> wrote:
>>
>> Hello,
>>
>> On 2023/10/7 10:13, Ian Rogers wrote:
>>> strcmp_cpuid_str performs regular expression comparisons. Avoid
>>> repeated computation of the table by remembering the table in a
>>> static.
>>>
>>> Signed-off-by: Ian Rogers <irogers@google.com>
>>> ---
>>> tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
>>> 1 file changed, 28 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
>>> index fd009752b427..8d8d5088c53c 100755
>>> --- a/tools/perf/pmu-events/jevents.py
>>> +++ b/tools/perf/pmu-events/jevents.py
>>> @@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
>>>
>>> const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
>>> {
>>> - const struct pmu_events_table *table = NULL;
>>> - char *cpuid = perf_pmu__getcpuid(pmu);
>>> + static const struct pmu_events_table *table;
>>> size_t i;
>>>
>>> - /* on some platforms which uses cpus map, cpuid can be NULL for
>>> - * PMUs other than CORE PMUs.
>>> - */
>>> - if (!cpuid)
>>> - return NULL;
>>> -
>>> - i = 0;
>>> - for (;;) {
>>> - const struct pmu_events_map *map = &pmu_events_map[i++];
>>> - if (!map->arch)
>>> - break;
>>> -
>>> - if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
>>> - table = &map->event_table;
>>> - break;
>>> + if (!table) {
>> If there is no matched table in pmu_events_map,
>> perf_pmu__find_events_table() will enter this branch for repeated search
>> each time.
>> Or do we need to use another variable to indicate whether the search has
>> been performed?
>
> Agreed, the behavior will match the existing behavior. Longer term I
> want to remove this code. Do you have a scenario we should optimize
> for here?
>
Yes, the CPU of the environment I'm using is "AuthenticAMD-15-6B-1" (not
in the pmu_events_map).
As a result, the search is repeated every time.
(If `perf record true` is executed once, the search is repeated for 6
times.)
This commit avoids repeated lookups to improve performance,
so if it's feasible, is it best to consider improving performance in
this case as well?
Thanks,
Yang
© 2016 - 2026 Red Hat, Inc.