[PATCH v4 06/22] perf jevents: Add tsx metric group for Intel models

Ian Rogers posted 22 patches 2 months ago
[PATCH v4 06/22] perf jevents: Add tsx metric group for Intel models
Posted by Ian Rogers 2 months ago
Allow duplicated metric to be dropped from json files. Detect when TSX
is supported by a model by using the json events, use sysfs events at
runtime as hypervisors, etc. may disable TSX.

Add CheckPmu to metric to determine if which PMUs have been associated
with the loaded events.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/pmu-events/intel_metrics.py | 52 +++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py
index f34b4230a4ee..58e243695f0a 100755
--- a/tools/perf/pmu-events/intel_metrics.py
+++ b/tools/perf/pmu-events/intel_metrics.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric,
+from metric import (d_ratio, has_event, max, CheckPmu, Event, JsonEncodeMetric,
                     JsonEncodeMetricGroupDescriptions, LoadEvents, Metric,
                     MetricGroup, MetricRef, Select)
 import argparse
 import json
 import math
 import os
+from typing import Optional
 
 # Global command line arguments.
 _args = None
@@ -74,6 +75,54 @@ def Smi() -> MetricGroup:
     ], description = 'System Management Interrupt metrics')
 
 
+def Tsx() -> Optional[MetricGroup]:
+  pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu"
+  cycles = Event('cycles')
+  cycles_in_tx = Event(f'{pmu}/cycles\-t/')
+  cycles_in_tx_cp = Event(f'{pmu}/cycles\-ct/')
+  try:
+    # Test if the tsx event is present in the json, prefer the
+    # sysfs version so that we can detect its presence at runtime.
+    transaction_start = Event("RTM_RETIRED.START")
+    transaction_start = Event(f'{pmu}/tx\-start/')
+  except:
+    return None
+
+  elision_start = None
+  try:
+    # Elision start isn't supported by all models, but we'll not
+    # generate the tsx_cycles_per_elision metric in that
+    # case. Again, prefer the sysfs encoding of the event.
+    elision_start = Event("HLE_RETIRED.START")
+    elision_start = Event(f'{pmu}/el\-start/')
+  except:
+    pass
+
+  return MetricGroup('transaction', [
+      Metric('tsx_transactional_cycles',
+             'Percentage of cycles within a transaction region.',
+             Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0),
+             '100%'),
+      Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.',
+             Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles,
+                    has_event(cycles_in_tx),
+                    0),
+             '100%'),
+      Metric('tsx_cycles_per_transaction',
+             'Number of cycles within a transaction divided by the number of transactions.',
+             Select(cycles_in_tx / transaction_start,
+                    has_event(cycles_in_tx),
+                    0),
+             "cycles / transaction"),
+      Metric('tsx_cycles_per_elision',
+             'Number of cycles within a transaction divided by the number of elisions.',
+             Select(cycles_in_tx / elision_start,
+                    has_event(elision_start),
+                    0),
+             "cycles / elision") if elision_start else None,
+  ], description="Breakdown of transactional memory statistics")
+
+
 def main() -> None:
   global _args
 
@@ -100,6 +149,7 @@ def main() -> None:
       Idle(),
       Rapl(),
       Smi(),
+      Tsx(),
   ])
 
 
-- 
2.46.1.824.gd892dcdcdd-goog
Re: [PATCH v4 06/22] perf jevents: Add tsx metric group for Intel models
Posted by Liang, Kan 3 weeks, 1 day ago

On 2024-09-26 1:50 p.m., Ian Rogers wrote:
> Allow duplicated metric to be dropped from json files. Detect when TSX
> is supported by a model by using the json events, use sysfs events at
> runtime as hypervisors, etc. may disable TSX.
> 
> Add CheckPmu to metric to determine if which PMUs have been associated
> with the loaded events.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/pmu-events/intel_metrics.py | 52 +++++++++++++++++++++++++-
>  1 file changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py
> index f34b4230a4ee..58e243695f0a 100755
> --- a/tools/perf/pmu-events/intel_metrics.py
> +++ b/tools/perf/pmu-events/intel_metrics.py
> @@ -1,12 +1,13 @@
>  #!/usr/bin/env python3
>  # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> -from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric,
> +from metric import (d_ratio, has_event, max, CheckPmu, Event, JsonEncodeMetric,
>                      JsonEncodeMetricGroupDescriptions, LoadEvents, Metric,
>                      MetricGroup, MetricRef, Select)
>  import argparse
>  import json
>  import math
>  import os
> +from typing import Optional
>  
>  # Global command line arguments.
>  _args = None
> @@ -74,6 +75,54 @@ def Smi() -> MetricGroup:
>      ], description = 'System Management Interrupt metrics')
>  
>  
> +def Tsx() -> Optional[MetricGroup]:
> +  pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu"
> +  cycles = Event('cycles')

Isn't the pmu prefix required for cycles as well?

> +  cycles_in_tx = Event(f'{pmu}/cycles\-t/')
> +  cycles_in_tx_cp = Event(f'{pmu}/cycles\-ct/')
> +  try:
> +    # Test if the tsx event is present in the json, prefer the
> +    # sysfs version so that we can detect its presence at runtime.
> +    transaction_start = Event("RTM_RETIRED.START")
> +    transaction_start = Event(f'{pmu}/tx\-start/')

What's the difference between this check and the later has_event() check?

All the tsx related events are model-specific events. We should check
them all before using it.

Thanks,
Kan
> +  except:> +    return None
> +
> +  elision_start = None
> +  try:
> +    # Elision start isn't supported by all models, but we'll not
> +    # generate the tsx_cycles_per_elision metric in that
> +    # case. Again, prefer the sysfs encoding of the event.
> +    elision_start = Event("HLE_RETIRED.START")
> +    elision_start = Event(f'{pmu}/el\-start/')
> +  except:
> +    pass
> +
> +  return MetricGroup('transaction', [
> +      Metric('tsx_transactional_cycles',
> +             'Percentage of cycles within a transaction region.',
> +             Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0),
> +             '100%'),
> +      Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.',
> +             Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles,
> +                    has_event(cycles_in_tx),
> +                    0),
> +             '100%'),
> +      Metric('tsx_cycles_per_transaction',
> +             'Number of cycles within a transaction divided by the number of transactions.',
> +             Select(cycles_in_tx / transaction_start,
> +                    has_event(cycles_in_tx),
> +                    0),
> +             "cycles / transaction"),
> +      Metric('tsx_cycles_per_elision',
> +             'Number of cycles within a transaction divided by the number of elisions.',
> +             Select(cycles_in_tx / elision_start,
> +                    has_event(elision_start),
> +                    0),
> +             "cycles / elision") if elision_start else None,
> +  ], description="Breakdown of transactional memory statistics")
> +
> +
>  def main() -> None:
>    global _args
>  
> @@ -100,6 +149,7 @@ def main() -> None:
>        Idle(),
>        Rapl(),
>        Smi(),
> +      Tsx(),
>    ])
>  
>
Re: [PATCH v4 06/22] perf jevents: Add tsx metric group for Intel models
Posted by Ian Rogers 3 weeks, 1 day ago
On Wed, Nov 6, 2024 at 9:53 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
>
>
>
> On 2024-09-26 1:50 p.m., Ian Rogers wrote:
> > Allow duplicated metric to be dropped from json files. Detect when TSX
> > is supported by a model by using the json events, use sysfs events at
> > runtime as hypervisors, etc. may disable TSX.
> >
> > Add CheckPmu to metric to determine if which PMUs have been associated
> > with the loaded events.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/pmu-events/intel_metrics.py | 52 +++++++++++++++++++++++++-
> >  1 file changed, 51 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py
> > index f34b4230a4ee..58e243695f0a 100755
> > --- a/tools/perf/pmu-events/intel_metrics.py
> > +++ b/tools/perf/pmu-events/intel_metrics.py
> > @@ -1,12 +1,13 @@
> >  #!/usr/bin/env python3
> >  # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> > -from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric,
> > +from metric import (d_ratio, has_event, max, CheckPmu, Event, JsonEncodeMetric,
> >                      JsonEncodeMetricGroupDescriptions, LoadEvents, Metric,
> >                      MetricGroup, MetricRef, Select)
> >  import argparse
> >  import json
> >  import math
> >  import os
> > +from typing import Optional
> >
> >  # Global command line arguments.
> >  _args = None
> > @@ -74,6 +75,54 @@ def Smi() -> MetricGroup:
> >      ], description = 'System Management Interrupt metrics')
> >
> >
> > +def Tsx() -> Optional[MetricGroup]:
> > +  pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu"
> > +  cycles = Event('cycles')
>
> Isn't the pmu prefix required for cycles as well?

Makes sense.

> > +  cycles_in_tx = Event(f'{pmu}/cycles\-t/')
> > +  cycles_in_tx_cp = Event(f'{pmu}/cycles\-ct/')
> > +  try:
> > +    # Test if the tsx event is present in the json, prefer the
> > +    # sysfs version so that we can detect its presence at runtime.
> > +    transaction_start = Event("RTM_RETIRED.START")
> > +    transaction_start = Event(f'{pmu}/tx\-start/')
>
> What's the difference between this check and the later has_event() check?
>
> All the tsx related events are model-specific events. We should check
> them all before using it.

So if there is PMU in the Event name then the Event logic assumes you
are using sysfs and doesn't check the event exists in json. As you
say, I needed a way to detect does this model support TSX? I wanted to
avoid a model lookup table, so I used the existence of
RTM_RETIRED.START for a model as the way to determine if the model
supports TSX. Once we know we have a model supporting TSX then we use
the sysfs event name and has_event check, so that if the TSX and the
event have been disabled the metric doesn't fail parsing.

So, the first check is a compile time check of, "does this model have
TSX?". The "has_event" check is a runtime thing where we want to see
if the event exists in sysfs in case the TSX was disabled say in the
BIOS.

Thanks,
Ian

>
> Thanks,
> Kan
> > +  except:> +    return None
> > +
> > +  elision_start = None
> > +  try:
> > +    # Elision start isn't supported by all models, but we'll not
> > +    # generate the tsx_cycles_per_elision metric in that
> > +    # case. Again, prefer the sysfs encoding of the event.
> > +    elision_start = Event("HLE_RETIRED.START")
> > +    elision_start = Event(f'{pmu}/el\-start/')
> > +  except:
> > +    pass
> > +
> > +  return MetricGroup('transaction', [
> > +      Metric('tsx_transactional_cycles',
> > +             'Percentage of cycles within a transaction region.',
> > +             Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0),
> > +             '100%'),
> > +      Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.',
> > +             Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles,
> > +                    has_event(cycles_in_tx),
> > +                    0),
> > +             '100%'),
> > +      Metric('tsx_cycles_per_transaction',
> > +             'Number of cycles within a transaction divided by the number of transactions.',
> > +             Select(cycles_in_tx / transaction_start,
> > +                    has_event(cycles_in_tx),
> > +                    0),
> > +             "cycles / transaction"),
> > +      Metric('tsx_cycles_per_elision',
> > +             'Number of cycles within a transaction divided by the number of elisions.',
> > +             Select(cycles_in_tx / elision_start,
> > +                    has_event(elision_start),
> > +                    0),
> > +             "cycles / elision") if elision_start else None,
> > +  ], description="Breakdown of transactional memory statistics")
> > +
> > +
> >  def main() -> None:
> >    global _args
> >
> > @@ -100,6 +149,7 @@ def main() -> None:
> >        Idle(),
> >        Rapl(),
> >        Smi(),
> > +      Tsx(),
> >    ])
> >
> >
>
Re: [PATCH v4 06/22] perf jevents: Add tsx metric group for Intel models
Posted by Liang, Kan 3 weeks, 1 day ago

On 2024-11-06 1:15 p.m., Ian Rogers wrote:
> On Wed, Nov 6, 2024 at 9:53 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
>>
>>
>>
>> On 2024-09-26 1:50 p.m., Ian Rogers wrote:
>>> Allow duplicated metric to be dropped from json files. Detect when TSX
>>> is supported by a model by using the json events, use sysfs events at
>>> runtime as hypervisors, etc. may disable TSX.
>>>
>>> Add CheckPmu to metric to determine if which PMUs have been associated
>>> with the loaded events.
>>>
>>> Signed-off-by: Ian Rogers <irogers@google.com>
>>> ---
>>>  tools/perf/pmu-events/intel_metrics.py | 52 +++++++++++++++++++++++++-
>>>  1 file changed, 51 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py
>>> index f34b4230a4ee..58e243695f0a 100755
>>> --- a/tools/perf/pmu-events/intel_metrics.py
>>> +++ b/tools/perf/pmu-events/intel_metrics.py
>>> @@ -1,12 +1,13 @@
>>>  #!/usr/bin/env python3
>>>  # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
>>> -from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric,
>>> +from metric import (d_ratio, has_event, max, CheckPmu, Event, JsonEncodeMetric,
>>>                      JsonEncodeMetricGroupDescriptions, LoadEvents, Metric,
>>>                      MetricGroup, MetricRef, Select)
>>>  import argparse
>>>  import json
>>>  import math
>>>  import os
>>> +from typing import Optional
>>>
>>>  # Global command line arguments.
>>>  _args = None
>>> @@ -74,6 +75,54 @@ def Smi() -> MetricGroup:
>>>      ], description = 'System Management Interrupt metrics')
>>>
>>>
>>> +def Tsx() -> Optional[MetricGroup]:
>>> +  pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu"
>>> +  cycles = Event('cycles')
>>
>> Isn't the pmu prefix required for cycles as well?
> 
> Makes sense.
> 
>>> +  cycles_in_tx = Event(f'{pmu}/cycles\-t/')
>>> +  cycles_in_tx_cp = Event(f'{pmu}/cycles\-ct/')
>>> +  try:
>>> +    # Test if the tsx event is present in the json, prefer the
>>> +    # sysfs version so that we can detect its presence at runtime.
>>> +    transaction_start = Event("RTM_RETIRED.START")
>>> +    transaction_start = Event(f'{pmu}/tx\-start/')
>>
>> What's the difference between this check and the later has_event() check?
>>
>> All the tsx related events are model-specific events. We should check
>> them all before using it.
> 
> So if there is PMU in the Event name then the Event logic assumes you
> are using sysfs and doesn't check the event exists in json. As you
> say, I needed a way to detect does this model support TSX? I wanted to
> avoid a model lookup table, so I used the existence of
> RTM_RETIRED.START for a model as the way to determine if the model
> supports TSX. Once we know we have a model supporting TSX then we use
> the sysfs event name and has_event check, so that if the TSX and the
> event have been disabled the metric doesn't fail parsing.
> 
> So, the first check is a compile time check of, "does this model have
> TSX?". The "has_event" check is a runtime thing where we want to see
> if the event exists in sysfs in case the TSX was disabled say in the
> BIOS.
> 

Yes, that's sufficient.
But the "has_event" check seems very random.

For example,
>>>> +      Metric('tsx_cycles_per_transaction',
>>>> +             'Number of cycles within a transaction divided by the number of transactions.',
>>>> +             Select(cycles_in_tx / transaction_start,
>>>> +                    has_event(cycles_in_tx),
>>>> +                    0),
>>>> +             "cycles / transaction"),

I think both cycles_in_tx and transaction_start should be checked.

>>>> +      Metric('tsx_cycles_per_elision',
>>>> +             'Number of cycles within a transaction divided by the number of elisions.',
>>>> +             Select(cycles_in_tx / elision_start,
>>>> +                    has_event(elision_start),
>>>> +                    0),

This one only checks the elision_start event.

Thanks,
Kan
> Thanks,
> Ian
> 
>>
>> Thanks,
>> Kan
>>> +  except:> +    return None
>>> +
>>> +  elision_start = None
>>> +  try:
>>> +    # Elision start isn't supported by all models, but we'll not
>>> +    # generate the tsx_cycles_per_elision metric in that
>>> +    # case. Again, prefer the sysfs encoding of the event.
>>> +    elision_start = Event("HLE_RETIRED.START")
>>> +    elision_start = Event(f'{pmu}/el\-start/')
>>> +  except:
>>> +    pass
>>> +
>>> +  return MetricGroup('transaction', [
>>> +      Metric('tsx_transactional_cycles',
>>> +             'Percentage of cycles within a transaction region.',
>>> +             Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0),
>>> +             '100%'),
>>> +      Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.',
>>> +             Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles,
>>> +                    has_event(cycles_in_tx),
>>> +                    0),
>>> +             '100%'),
>>> +      Metric('tsx_cycles_per_transaction',
>>> +             'Number of cycles within a transaction divided by the number of transactions.',
>>> +             Select(cycles_in_tx / transaction_start,
>>> +                    has_event(cycles_in_tx),
>>> +                    0),
>>> +             "cycles / transaction"),
>>> +      Metric('tsx_cycles_per_elision',
>>> +             'Number of cycles within a transaction divided by the number of elisions.',
>>> +             Select(cycles_in_tx / elision_start,
>>> +                    has_event(elision_start),
>>> +                    0),
>>> +             "cycles / elision") if elision_start else None,
>>> +  ], description="Breakdown of transactional memory statistics")
>>> +
>>> +
>>>  def main() -> None:
>>>    global _args
>>>
>>> @@ -100,6 +149,7 @@ def main() -> None:
>>>        Idle(),
>>>        Rapl(),
>>>        Smi(),
>>> +      Tsx(),
>>>    ])
>>>
>>>
>>
>