[v1] Fix perf on Intel hybrid CPUs

[PATCH v1 03/40] perf vendor events intel: Add icelake metric constraints

Posted by Ian Rogers 2 years, 9 months ago

Previously these constraints were disabled as they contained topdown
events. Since:
https://lore.kernel.org/all/20230312021543.3060328-9-irogers@google.com/
the topdown events are correctly grouped even if no group exists.

This change was created by PR:
https://github.com/intel/perfmon/pull/71

Signed-off-by: Ian Rogers <irogers@google.com>
---
 .../perf/pmu-events/arch/x86/icelake/icl-metrics.json | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
index f45ae3483df4..cb58317860ea 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
@@ -311,6 +311,7 @@
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
@@ -413,6 +414,7 @@
     },
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_branch_misprediction_cost",
@@ -458,6 +460,7 @@
     },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
         "MetricGroup": "Cor;SMT",
         "MetricName": "tma_info_core_bound_likely",
@@ -510,6 +513,7 @@
     },
     {
         "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
         "MetricGroup": "DSBmiss;Fed;tma_issueFB",
         "MetricName": "tma_info_dsb_misses",
@@ -591,6 +595,7 @@
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_instruction_fetch_bw",
@@ -929,6 +934,7 @@
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_memory_data_tlbs",
@@ -937,6 +943,7 @@
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
         "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
         "MetricName": "tma_info_memory_latency",
@@ -945,6 +952,7 @@
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_mispredictions",
@@ -996,6 +1004,7 @@
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_retire"
@@ -1196,6 +1205,7 @@
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_memory_operations",
@@ -1266,6 +1276,7 @@
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
-- 
2.40.1.495.gc816e09b53d-goog

Re: [PATCH v1 03/40] perf vendor events intel: Add icelake metric constraints

Posted by Liang, Kan 2 years, 9 months ago


On 2023-04-26 3:00 a.m., Ian Rogers wrote:
> Previously these constraints were disabled as they contained topdown
> events. Since:
> https://lore.kernel.org/all/20230312021543.3060328-9-irogers@google.com/
> the topdown events are correctly grouped even if no group exists.
> 
> This change was created by PR:
> https://github.com/intel/perfmon/pull/71
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  .../perf/pmu-events/arch/x86/icelake/icl-metrics.json | 11 +++++++++++

Since it targets fixing the hybrid issues, could you please move the
unrelated patch out of the series? A huge series is realy hard to be
reviewed.


Thanks,
Kan

>  1 file changed, 11 insertions(+)
> 
> diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
> index f45ae3483df4..cb58317860ea 100644
> --- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
> +++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
> @@ -311,6 +311,7 @@
>      },
>      {
>          "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
>          "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
>          "MetricName": "tma_fp_arith",
> @@ -413,6 +414,7 @@
>      },
>      {
>          "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
>          "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
>          "MetricName": "tma_info_branch_misprediction_cost",
> @@ -458,6 +460,7 @@
>      },
>      {
>          "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
>          "MetricGroup": "Cor;SMT",
>          "MetricName": "tma_info_core_bound_likely",
> @@ -510,6 +513,7 @@
>      },
>      {
>          "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
>          "MetricGroup": "DSBmiss;Fed;tma_issueFB",
>          "MetricName": "tma_info_dsb_misses",
> @@ -591,6 +595,7 @@
>      },
>      {
>          "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
>          "MetricGroup": "Fed;FetchBW;Frontend",
>          "MetricName": "tma_info_instruction_fetch_bw",
> @@ -929,6 +934,7 @@
>      },
>      {
>          "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
>          "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
>          "MetricName": "tma_info_memory_data_tlbs",
> @@ -937,6 +943,7 @@
>      },
>      {
>          "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
>          "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
>          "MetricName": "tma_info_memory_latency",
> @@ -945,6 +952,7 @@
>      },
>      {
>          "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
>          "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
>          "MetricName": "tma_info_mispredictions",
> @@ -996,6 +1004,7 @@
>      },
>      {
>          "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
>          "MetricGroup": "Pipeline;Ret",
>          "MetricName": "tma_info_retire"
> @@ -1196,6 +1205,7 @@
>      },
>      {
>          "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY",
>          "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
>          "MetricName": "tma_memory_operations",
> @@ -1266,6 +1276,7 @@
>      },
>      {
>          "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
> +        "MetricConstraint": "NO_GROUP_EVENTS",
>          "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
>          "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
>          "MetricName": "tma_other_light_ops",

Re: [PATCH v1 03/40] perf vendor events intel: Add icelake metric constraints

Posted by Ian Rogers 2 years, 9 months ago

On Thu, Apr 27, 2023 at 12:06 PM Liang, Kan <kan.liang@linux.intel.com> wrote:
>
>
>
> On 2023-04-26 3:00 a.m., Ian Rogers wrote:
> > Previously these constraints were disabled as they contained topdown
> > events. Since:
> > https://lore.kernel.org/all/20230312021543.3060328-9-irogers@google.com/
> > the topdown events are correctly grouped even if no group exists.
> >
> > This change was created by PR:
> > https://github.com/intel/perfmon/pull/71
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  .../perf/pmu-events/arch/x86/icelake/icl-metrics.json | 11 +++++++++++
>
> Since it targets fixing the hybrid issues, could you please move the
> unrelated patch out of the series? A huge series is realy hard to be
> reviewed.

I have done. The independent patches are at the front while the
dependencies are in the later patches. This is covered in the cover
letter.

Thanks,
Ian

> Thanks,
> Kan
>
> >  1 file changed, 11 insertions(+)
> >
> > diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
> > index f45ae3483df4..cb58317860ea 100644
> > --- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
> > +++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
> > @@ -311,6 +311,7 @@
> >      },
> >      {
> >          "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
> >          "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
> >          "MetricName": "tma_fp_arith",
> > @@ -413,6 +414,7 @@
> >      },
> >      {
> >          "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
> >          "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
> >          "MetricName": "tma_info_branch_misprediction_cost",
> > @@ -458,6 +460,7 @@
> >      },
> >      {
> >          "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
> >          "MetricGroup": "Cor;SMT",
> >          "MetricName": "tma_info_core_bound_likely",
> > @@ -510,6 +513,7 @@
> >      },
> >      {
> >          "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
> >          "MetricGroup": "DSBmiss;Fed;tma_issueFB",
> >          "MetricName": "tma_info_dsb_misses",
> > @@ -591,6 +595,7 @@
> >      },
> >      {
> >          "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
> >          "MetricGroup": "Fed;FetchBW;Frontend",
> >          "MetricName": "tma_info_instruction_fetch_bw",
> > @@ -929,6 +934,7 @@
> >      },
> >      {
> >          "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
> >          "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
> >          "MetricName": "tma_info_memory_data_tlbs",
> > @@ -937,6 +943,7 @@
> >      },
> >      {
> >          "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
> >          "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
> >          "MetricName": "tma_info_memory_latency",
> > @@ -945,6 +952,7 @@
> >      },
> >      {
> >          "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
> >          "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
> >          "MetricName": "tma_info_mispredictions",
> > @@ -996,6 +1004,7 @@
> >      },
> >      {
> >          "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
> >          "MetricGroup": "Pipeline;Ret",
> >          "MetricName": "tma_info_retire"
> > @@ -1196,6 +1205,7 @@
> >      },
> >      {
> >          "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY",
> >          "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
> >          "MetricName": "tma_memory_operations",
> > @@ -1266,6 +1276,7 @@
> >      },
> >      {
> >          "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
> > +        "MetricConstraint": "NO_GROUP_EVENTS",
> >          "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
> >          "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
> >          "MetricName": "tma_other_light_ops",