[RFC PATCH v6 4/5] perf stat: Add retire latency print functions to print out at the very end of print out

weilin.wang@intel.com posted 5 patches 1 year, 10 months ago
There is a newer version of this series
[RFC PATCH v6 4/5] perf stat: Add retire latency print functions to print out at the very end of print out
Posted by weilin.wang@intel.com 1 year, 10 months ago
From: Weilin Wang <weilin.wang@intel.com>

Add print out functions so that users could read retire latency values.

Example output:
In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES sample.
Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency value, count
and sum are all 0.

 Performance counter stats for 'system wide':

       181,047,168      cpu_core/TOPDOWN.SLOTS/          #      0.6 %  tma_dtlb_store
         3,195,608      cpu_core/topdown-retiring/
        40,156,649      cpu_core/topdown-mem-bound/
         3,550,925      cpu_core/topdown-bad-spec/
       117,571,818      cpu_core/topdown-fe-bound/
        57,118,087      cpu_core/topdown-be-bound/
            69,179      cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
             4,582      cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
        30,183,104      cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
        30,556,790      cpu_core/CPU_CLK_UNHALTED.THREAD/
           168,486      cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
              0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p       0        0

       1.003105924 seconds time elapsed

Signed-off-by: Weilin Wang <weilin.wang@intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/stat-display.c | 65 ++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bfc1d705f437..6c043d9c9f81 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -21,6 +21,7 @@
 #include "iostat.h"
 #include "pmu.h"
 #include "pmus.h"
+#include "metricgroup.h"
 
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
@@ -34,6 +35,7 @@
 #define COMM_LEN     16
 #define PID_LEN       7
 #define CPUS_LEN      4
+#define RETIRE_LEN    8
 
 static int aggr_header_lens[] = {
 	[AGGR_CORE] 	= 18,
@@ -426,6 +428,67 @@ static void print_metric_std(struct perf_stat_config *config,
 	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
 }
 
+static void print_retire_lat_std(struct perf_stat_config *config,
+				 struct outstate *os)
+{
+	FILE *out = os->fh;
+	bool newline = os->newline;
+	struct tpebs_retire_lat *t;
+	struct list_head *retire_lats = &config->tpebs_results;
+
+	list_for_each_entry(t, retire_lats, event.nd) {
+		if (newline)
+			do_new_line_std(config, os);
+		fprintf(out, "%'*.2f %-*s", COUNTS_LEN, t->val, EVNAME_LEN, t->event.name);
+		fprintf(out, "%*ld %*d\n", RETIRE_LEN, t->count,
+			 RETIRE_LEN, t->sum);
+	}
+}
+
+static void print_retire_lat_csv(struct perf_stat_config *config,
+				 struct outstate *os)
+{
+	FILE *out = os->fh;
+	struct tpebs_retire_lat *t;
+	struct list_head *retire_lats = &config->tpebs_results;
+	const char *sep = config->csv_sep;
+
+	list_for_each_entry(t, retire_lats, event.nd) {
+		fprintf(out, "%f%s%s%s%s%ld%s%d\n", t->val, sep, sep, t->event.name, sep,
+			t->count, sep, t->sum);
+	}
+}
+
+static void print_retire_lat_json(struct perf_stat_config *config,
+				  struct outstate *os)
+{
+	FILE *out = os->fh;
+	struct tpebs_retire_lat *t;
+	struct list_head *retire_lats = &config->tpebs_results;
+
+	fprintf(out, "{");
+	list_for_each_entry(t, retire_lats, event.nd) {
+		fprintf(out, "\"retire_latency-value\" : \"%f\", ", t->val);
+		fprintf(out, "\"event-name\" : \"%s\"", t->event.name);
+		fprintf(out, "\"sample-counts\" : \"%ld\"", t->count);
+		fprintf(out, "\"retire_latency-sum\" : \"%d\"", t->sum);
+	}
+	fprintf(out, "}");
+}
+
+static void print_retire_lat(struct perf_stat_config *config,
+			     struct outstate *os)
+{
+	if (!&config->tpebs_results)
+		return;
+	if (config->json_output)
+		print_retire_lat_json(config, os);
+	else if (config->csv_output)
+		print_retire_lat_csv(config, os);
+	else
+		print_retire_lat_std(config, os);
+}
+
 static void new_line_csv(struct perf_stat_config *config, void *ctx)
 {
 	struct outstate *os = ctx;
@@ -1609,6 +1672,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 		break;
 	}
 
+	print_retire_lat(config, &os);
+
 	print_footer(config);
 
 	fflush(config->output);
-- 
2.43.0
Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to print out at the very end of print out
Posted by Namhyung Kim 1 year, 10 months ago
On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@intel.com> wrote:
>
> From: Weilin Wang <weilin.wang@intel.com>
>
> Add print out functions so that users could read retire latency values.
>
> Example output:
> In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES sample.
> Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency value, count
> and sum are all 0.
>
>  Performance counter stats for 'system wide':
>
>        181,047,168      cpu_core/TOPDOWN.SLOTS/          #      0.6 %  tma_dtlb_store
>          3,195,608      cpu_core/topdown-retiring/
>         40,156,649      cpu_core/topdown-mem-bound/
>          3,550,925      cpu_core/topdown-bad-spec/
>        117,571,818      cpu_core/topdown-fe-bound/
>         57,118,087      cpu_core/topdown-be-bound/
>             69,179      cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
>              4,582      cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
>         30,183,104      cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
>         30,556,790      cpu_core/CPU_CLK_UNHALTED.THREAD/
>            168,486      cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
>               0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p       0        0

The output is not aligned and I think it's hard to read.
I think it should print the result like this:

    <sum>  <event-name>  # <val>  average retired latency

Thanks,
Namhyung


>
>        1.003105924 seconds time elapsed
>
> Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> Reviewed-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/stat-display.c | 65 ++++++++++++++++++++++++++++++++++
>  1 file changed, 65 insertions(+)
>
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index bfc1d705f437..6c043d9c9f81 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -21,6 +21,7 @@
>  #include "iostat.h"
>  #include "pmu.h"
>  #include "pmus.h"
> +#include "metricgroup.h"
>
>  #define CNTR_NOT_SUPPORTED     "<not supported>"
>  #define CNTR_NOT_COUNTED       "<not counted>"
> @@ -34,6 +35,7 @@
>  #define COMM_LEN     16
>  #define PID_LEN       7
>  #define CPUS_LEN      4
> +#define RETIRE_LEN    8
>
>  static int aggr_header_lens[] = {
>         [AGGR_CORE]     = 18,
> @@ -426,6 +428,67 @@ static void print_metric_std(struct perf_stat_config *config,
>         fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
>  }
>
> +static void print_retire_lat_std(struct perf_stat_config *config,
> +                                struct outstate *os)
> +{
> +       FILE *out = os->fh;
> +       bool newline = os->newline;
> +       struct tpebs_retire_lat *t;
> +       struct list_head *retire_lats = &config->tpebs_results;
> +
> +       list_for_each_entry(t, retire_lats, event.nd) {
> +               if (newline)
> +                       do_new_line_std(config, os);
> +               fprintf(out, "%'*.2f %-*s", COUNTS_LEN, t->val, EVNAME_LEN, t->event.name);
> +               fprintf(out, "%*ld %*d\n", RETIRE_LEN, t->count,
> +                        RETIRE_LEN, t->sum);
> +       }
> +}
> +
> +static void print_retire_lat_csv(struct perf_stat_config *config,
> +                                struct outstate *os)
> +{
> +       FILE *out = os->fh;
> +       struct tpebs_retire_lat *t;
> +       struct list_head *retire_lats = &config->tpebs_results;
> +       const char *sep = config->csv_sep;
> +
> +       list_for_each_entry(t, retire_lats, event.nd) {
> +               fprintf(out, "%f%s%s%s%s%ld%s%d\n", t->val, sep, sep, t->event.name, sep,
> +                       t->count, sep, t->sum);
> +       }
> +}
> +
> +static void print_retire_lat_json(struct perf_stat_config *config,
> +                                 struct outstate *os)
> +{
> +       FILE *out = os->fh;
> +       struct tpebs_retire_lat *t;
> +       struct list_head *retire_lats = &config->tpebs_results;
> +
> +       fprintf(out, "{");
> +       list_for_each_entry(t, retire_lats, event.nd) {
> +               fprintf(out, "\"retire_latency-value\" : \"%f\", ", t->val);
> +               fprintf(out, "\"event-name\" : \"%s\"", t->event.name);
> +               fprintf(out, "\"sample-counts\" : \"%ld\"", t->count);
> +               fprintf(out, "\"retire_latency-sum\" : \"%d\"", t->sum);
> +       }
> +       fprintf(out, "}");
> +}
> +
> +static void print_retire_lat(struct perf_stat_config *config,
> +                            struct outstate *os)
> +{
> +       if (!&config->tpebs_results)
> +               return;
> +       if (config->json_output)
> +               print_retire_lat_json(config, os);
> +       else if (config->csv_output)
> +               print_retire_lat_csv(config, os);
> +       else
> +               print_retire_lat_std(config, os);
> +}
> +
>  static void new_line_csv(struct perf_stat_config *config, void *ctx)
>  {
>         struct outstate *os = ctx;
> @@ -1609,6 +1672,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
>                 break;
>         }
>
> +       print_retire_lat(config, &os);
> +
>         print_footer(config);
>
>         fflush(config->output);
> --
> 2.43.0
>
RE: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to print out at the very end of print out
Posted by Wang, Weilin 1 year, 10 months ago

> -----Original Message-----
> From: Namhyung Kim <namhyung@kernel.org>
> Sent: Monday, April 1, 2024 2:04 PM
> To: Wang, Weilin <weilin.wang@intel.com>
> Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> <mingo@redhat.com>; Alexander Shishkin
> <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry
> <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers,
> Caleb <caleb.biggers@intel.com>
> Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to
> print out at the very end of print out
> 
> On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@intel.com> wrote:
> >
> > From: Weilin Wang <weilin.wang@intel.com>
> >
> > Add print out functions so that users could read retire latency values.
> >
> > Example output:
> > In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES
> sample.
> > Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency
> value, count
> > and sum are all 0.
> >
> >  Performance counter stats for 'system wide':
> >
> >        181,047,168      cpu_core/TOPDOWN.SLOTS/          #      0.6 %
> tma_dtlb_store
> >          3,195,608      cpu_core/topdown-retiring/
> >         40,156,649      cpu_core/topdown-mem-bound/
> >          3,550,925      cpu_core/topdown-bad-spec/
> >        117,571,818      cpu_core/topdown-fe-bound/
> >         57,118,087      cpu_core/topdown-be-bound/
> >             69,179      cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
> >              4,582      cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
> >         30,183,104      cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
> >         30,556,790      cpu_core/CPU_CLK_UNHALTED.THREAD/
> >            168,486      cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
> >               0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p       0        0
> 
> The output is not aligned and I think it's hard to read.
> I think it should print the result like this:
> 
>     <sum>  <event-name>  # <val>  average retired latency

Since we would like to use the average retire latency, I would think put average
at the beginning would be more consistent. So in format like:
<val> <event-name> <sum> <count> or <val> <event-name> <count> <sum> ?

I will work on the alignment. 

Thanks, 
Weilin

> 
> Thanks,
> Namhyung
> 
> 
> >
> >        1.003105924 seconds time elapsed
> >
> > Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> > Reviewed-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/util/stat-display.c | 65
> ++++++++++++++++++++++++++++++++++
> >  1 file changed, 65 insertions(+)
> >
> > diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> > index bfc1d705f437..6c043d9c9f81 100644
> > --- a/tools/perf/util/stat-display.c
> > +++ b/tools/perf/util/stat-display.c
> > @@ -21,6 +21,7 @@
> >  #include "iostat.h"
> >  #include "pmu.h"
> >  #include "pmus.h"
> > +#include "metricgroup.h"
> >
> >  #define CNTR_NOT_SUPPORTED     "<not supported>"
> >  #define CNTR_NOT_COUNTED       "<not counted>"
> > @@ -34,6 +35,7 @@
> >  #define COMM_LEN     16
> >  #define PID_LEN       7
> >  #define CPUS_LEN      4
> > +#define RETIRE_LEN    8
> >
> >  static int aggr_header_lens[] = {
> >         [AGGR_CORE]     = 18,
> > @@ -426,6 +428,67 @@ static void print_metric_std(struct
> perf_stat_config *config,
> >         fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
> >  }
> >
> > +static void print_retire_lat_std(struct perf_stat_config *config,
> > +                                struct outstate *os)
> > +{
> > +       FILE *out = os->fh;
> > +       bool newline = os->newline;
> > +       struct tpebs_retire_lat *t;
> > +       struct list_head *retire_lats = &config->tpebs_results;
> > +
> > +       list_for_each_entry(t, retire_lats, event.nd) {
> > +               if (newline)
> > +                       do_new_line_std(config, os);
> > +               fprintf(out, "%'*.2f %-*s", COUNTS_LEN, t->val, EVNAME_LEN, t-
> >event.name);
> > +               fprintf(out, "%*ld %*d\n", RETIRE_LEN, t->count,
> > +                        RETIRE_LEN, t->sum);
> > +       }
> > +}
> > +
> > +static void print_retire_lat_csv(struct perf_stat_config *config,
> > +                                struct outstate *os)
> > +{
> > +       FILE *out = os->fh;
> > +       struct tpebs_retire_lat *t;
> > +       struct list_head *retire_lats = &config->tpebs_results;
> > +       const char *sep = config->csv_sep;
> > +
> > +       list_for_each_entry(t, retire_lats, event.nd) {
> > +               fprintf(out, "%f%s%s%s%s%ld%s%d\n", t->val, sep, sep, t-
> >event.name, sep,
> > +                       t->count, sep, t->sum);
> > +       }
> > +}
> > +
> > +static void print_retire_lat_json(struct perf_stat_config *config,
> > +                                 struct outstate *os)
> > +{
> > +       FILE *out = os->fh;
> > +       struct tpebs_retire_lat *t;
> > +       struct list_head *retire_lats = &config->tpebs_results;
> > +
> > +       fprintf(out, "{");
> > +       list_for_each_entry(t, retire_lats, event.nd) {
> > +               fprintf(out, "\"retire_latency-value\" : \"%f\", ", t->val);
> > +               fprintf(out, "\"event-name\" : \"%s\"", t->event.name);
> > +               fprintf(out, "\"sample-counts\" : \"%ld\"", t->count);
> > +               fprintf(out, "\"retire_latency-sum\" : \"%d\"", t->sum);
> > +       }
> > +       fprintf(out, "}");
> > +}
> > +
> > +static void print_retire_lat(struct perf_stat_config *config,
> > +                            struct outstate *os)
> > +{
> > +       if (!&config->tpebs_results)
> > +               return;
> > +       if (config->json_output)
> > +               print_retire_lat_json(config, os);
> > +       else if (config->csv_output)
> > +               print_retire_lat_csv(config, os);
> > +       else
> > +               print_retire_lat_std(config, os);
> > +}
> > +
> >  static void new_line_csv(struct perf_stat_config *config, void *ctx)
> >  {
> >         struct outstate *os = ctx;
> > @@ -1609,6 +1672,8 @@ void evlist__print_counters(struct evlist *evlist,
> struct perf_stat_config *conf
> >                 break;
> >         }
> >
> > +       print_retire_lat(config, &os);
> > +
> >         print_footer(config);
> >
> >         fflush(config->output);
> > --
> > 2.43.0
> >
Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to print out at the very end of print out
Posted by Namhyung Kim 1 year, 10 months ago
On Mon, Apr 1, 2024 at 2:08 PM Wang, Weilin <weilin.wang@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Namhyung Kim <namhyung@kernel.org>
> > Sent: Monday, April 1, 2024 2:04 PM
> > To: Wang, Weilin <weilin.wang@intel.com>
> > Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> > <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> > <mingo@redhat.com>; Alexander Shishkin
> > <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> > Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> > linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry
> > <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers,
> > Caleb <caleb.biggers@intel.com>
> > Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to
> > print out at the very end of print out
> >
> > On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@intel.com> wrote:
> > >
> > > From: Weilin Wang <weilin.wang@intel.com>
> > >
> > > Add print out functions so that users could read retire latency values.
> > >
> > > Example output:
> > > In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES
> > sample.
> > > Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency
> > value, count
> > > and sum are all 0.
> > >
> > >  Performance counter stats for 'system wide':
> > >
> > >        181,047,168      cpu_core/TOPDOWN.SLOTS/          #      0.6 %
> > tma_dtlb_store
> > >          3,195,608      cpu_core/topdown-retiring/
> > >         40,156,649      cpu_core/topdown-mem-bound/
> > >          3,550,925      cpu_core/topdown-bad-spec/
> > >        117,571,818      cpu_core/topdown-fe-bound/
> > >         57,118,087      cpu_core/topdown-be-bound/
> > >             69,179      cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
> > >              4,582      cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
> > >         30,183,104      cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
> > >         30,556,790      cpu_core/CPU_CLK_UNHALTED.THREAD/
> > >            168,486      cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
> > >               0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p       0        0
> >
> > The output is not aligned and I think it's hard to read.
> > I think it should print the result like this:
> >
> >     <sum>  <event-name>  # <val>  average retired latency
>
> Since we would like to use the average retire latency, I would think put average
> at the beginning would be more consistent. So in format like:
> <val> <event-name> <sum> <count> or <val> <event-name> <count> <sum> ?

But it's not consistent with others.  When I see the perf stat
output, I'd expect it shows the total count.  And the average
latency is a derived value so I think it can be treated as a metric.

Thanks,
Namhyung
RE: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to print out at the very end of print out
Posted by Wang, Weilin 1 year, 10 months ago

> -----Original Message-----
> From: Namhyung Kim <namhyung@kernel.org>
> Sent: Monday, April 1, 2024 2:15 PM
> To: Wang, Weilin <weilin.wang@intel.com>
> Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> <mingo@redhat.com>; Alexander Shishkin
> <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry
> <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers,
> Caleb <caleb.biggers@intel.com>
> Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to
> print out at the very end of print out
> 
> On Mon, Apr 1, 2024 at 2:08 PM Wang, Weilin <weilin.wang@intel.com>
> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Namhyung Kim <namhyung@kernel.org>
> > > Sent: Monday, April 1, 2024 2:04 PM
> > > To: Wang, Weilin <weilin.wang@intel.com>
> > > Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> > > <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> > > <mingo@redhat.com>; Alexander Shishkin
> > > <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> > > Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> > > linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor,
> Perry
> > > <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>;
> Biggers,
> > > Caleb <caleb.biggers@intel.com>
> > > Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print
> functions to
> > > print out at the very end of print out
> > >
> > > On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@intel.com> wrote:
> > > >
> > > > From: Weilin Wang <weilin.wang@intel.com>
> > > >
> > > > Add print out functions so that users could read retire latency values.
> > > >
> > > > Example output:
> > > > In this simple example, there is no
> MEM_INST_RETIRED.STLB_HIT_STORES
> > > sample.
> > > > Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency
> > > value, count
> > > > and sum are all 0.
> > > >
> > > >  Performance counter stats for 'system wide':
> > > >
> > > >        181,047,168      cpu_core/TOPDOWN.SLOTS/          #      0.6 %
> > > tma_dtlb_store
> > > >          3,195,608      cpu_core/topdown-retiring/
> > > >         40,156,649      cpu_core/topdown-mem-bound/
> > > >          3,550,925      cpu_core/topdown-bad-spec/
> > > >        117,571,818      cpu_core/topdown-fe-bound/
> > > >         57,118,087      cpu_core/topdown-be-bound/
> > > >             69,179      cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
> > > >              4,582      cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
> > > >         30,183,104      cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
> > > >         30,556,790      cpu_core/CPU_CLK_UNHALTED.THREAD/
> > > >            168,486      cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
> > > >               0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p       0        0
> > >
> > > The output is not aligned and I think it's hard to read.
> > > I think it should print the result like this:
> > >
> > >     <sum>  <event-name>  # <val>  average retired latency
> >
> > Since we would like to use the average retire latency, I would think put
> average
> > at the beginning would be more consistent. So in format like:
> > <val> <event-name> <sum> <count> or <val> <event-name> <count>
> <sum> ?
> 
> But it's not consistent with others.  When I see the perf stat
> output, I'd expect it shows the total count.  And the average
> latency is a derived value so I think it can be treated as a metric.

I think whether it is consistent or not depends on how we read this data. 
If there is multiplexing happening, would the total count value of events be
the scaled counts or raw counts? If these are scaled counts, then these are 
derived value as well. But we do expect the first column shows the value we 
care most from the row.

Thanks,
Weilin

> 
> Thanks,
> Namhyung