From: Weilin Wang <weilin.wang@intel.com>
Add print out functions so that users could read retire latency values.
Example output:
In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES sample.
Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency value, count
and sum are all 0.
Performance counter stats for 'system wide':
181,047,168 cpu_core/TOPDOWN.SLOTS/ # 0.6 % tma_dtlb_store
3,195,608 cpu_core/topdown-retiring/
40,156,649 cpu_core/topdown-mem-bound/
3,550,925 cpu_core/topdown-bad-spec/
117,571,818 cpu_core/topdown-fe-bound/
57,118,087 cpu_core/topdown-be-bound/
69,179 cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
4,582 cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
30,183,104 cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
30,556,790 cpu_core/CPU_CLK_UNHALTED.THREAD/
168,486 cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p 0 0
1.003105924 seconds time elapsed
Signed-off-by: Weilin Wang <weilin.wang@intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/stat-display.c | 65 ++++++++++++++++++++++++++++++++++
1 file changed, 65 insertions(+)
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bfc1d705f437..6c043d9c9f81 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -21,6 +21,7 @@
#include "iostat.h"
#include "pmu.h"
#include "pmus.h"
+#include "metricgroup.h"
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
@@ -34,6 +35,7 @@
#define COMM_LEN 16
#define PID_LEN 7
#define CPUS_LEN 4
+#define RETIRE_LEN 8
static int aggr_header_lens[] = {
[AGGR_CORE] = 18,
@@ -426,6 +428,67 @@ static void print_metric_std(struct perf_stat_config *config,
fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
}
+static void print_retire_lat_std(struct perf_stat_config *config,
+ struct outstate *os)
+{
+ FILE *out = os->fh;
+ bool newline = os->newline;
+ struct tpebs_retire_lat *t;
+ struct list_head *retire_lats = &config->tpebs_results;
+
+ list_for_each_entry(t, retire_lats, event.nd) {
+ if (newline)
+ do_new_line_std(config, os);
+ fprintf(out, "%'*.2f %-*s", COUNTS_LEN, t->val, EVNAME_LEN, t->event.name);
+ fprintf(out, "%*ld %*d\n", RETIRE_LEN, t->count,
+ RETIRE_LEN, t->sum);
+ }
+}
+
+static void print_retire_lat_csv(struct perf_stat_config *config,
+ struct outstate *os)
+{
+ FILE *out = os->fh;
+ struct tpebs_retire_lat *t;
+ struct list_head *retire_lats = &config->tpebs_results;
+ const char *sep = config->csv_sep;
+
+ list_for_each_entry(t, retire_lats, event.nd) {
+ fprintf(out, "%f%s%s%s%s%ld%s%d\n", t->val, sep, sep, t->event.name, sep,
+ t->count, sep, t->sum);
+ }
+}
+
+static void print_retire_lat_json(struct perf_stat_config *config,
+ struct outstate *os)
+{
+ FILE *out = os->fh;
+ struct tpebs_retire_lat *t;
+ struct list_head *retire_lats = &config->tpebs_results;
+
+ fprintf(out, "{");
+ list_for_each_entry(t, retire_lats, event.nd) {
+ fprintf(out, "\"retire_latency-value\" : \"%f\", ", t->val);
+ fprintf(out, "\"event-name\" : \"%s\"", t->event.name);
+ fprintf(out, "\"sample-counts\" : \"%ld\"", t->count);
+ fprintf(out, "\"retire_latency-sum\" : \"%d\"", t->sum);
+ }
+ fprintf(out, "}");
+}
+
+static void print_retire_lat(struct perf_stat_config *config,
+ struct outstate *os)
+{
+ if (!&config->tpebs_results)
+ return;
+ if (config->json_output)
+ print_retire_lat_json(config, os);
+ else if (config->csv_output)
+ print_retire_lat_csv(config, os);
+ else
+ print_retire_lat_std(config, os);
+}
+
static void new_line_csv(struct perf_stat_config *config, void *ctx)
{
struct outstate *os = ctx;
@@ -1609,6 +1672,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
break;
}
+ print_retire_lat(config, &os);
+
print_footer(config);
fflush(config->output);
--
2.43.0
On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@intel.com> wrote:
>
> From: Weilin Wang <weilin.wang@intel.com>
>
> Add print out functions so that users could read retire latency values.
>
> Example output:
> In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES sample.
> Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency value, count
> and sum are all 0.
>
> Performance counter stats for 'system wide':
>
> 181,047,168 cpu_core/TOPDOWN.SLOTS/ # 0.6 % tma_dtlb_store
> 3,195,608 cpu_core/topdown-retiring/
> 40,156,649 cpu_core/topdown-mem-bound/
> 3,550,925 cpu_core/topdown-bad-spec/
> 117,571,818 cpu_core/topdown-fe-bound/
> 57,118,087 cpu_core/topdown-be-bound/
> 69,179 cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
> 4,582 cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
> 30,183,104 cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
> 30,556,790 cpu_core/CPU_CLK_UNHALTED.THREAD/
> 168,486 cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
> 0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p 0 0
The output is not aligned and I think it's hard to read.
I think it should print the result like this:
<sum> <event-name> # <val> average retired latency
Thanks,
Namhyung
>
> 1.003105924 seconds time elapsed
>
> Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> Reviewed-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/util/stat-display.c | 65 ++++++++++++++++++++++++++++++++++
> 1 file changed, 65 insertions(+)
>
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index bfc1d705f437..6c043d9c9f81 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -21,6 +21,7 @@
> #include "iostat.h"
> #include "pmu.h"
> #include "pmus.h"
> +#include "metricgroup.h"
>
> #define CNTR_NOT_SUPPORTED "<not supported>"
> #define CNTR_NOT_COUNTED "<not counted>"
> @@ -34,6 +35,7 @@
> #define COMM_LEN 16
> #define PID_LEN 7
> #define CPUS_LEN 4
> +#define RETIRE_LEN 8
>
> static int aggr_header_lens[] = {
> [AGGR_CORE] = 18,
> @@ -426,6 +428,67 @@ static void print_metric_std(struct perf_stat_config *config,
> fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
> }
>
> +static void print_retire_lat_std(struct perf_stat_config *config,
> + struct outstate *os)
> +{
> + FILE *out = os->fh;
> + bool newline = os->newline;
> + struct tpebs_retire_lat *t;
> + struct list_head *retire_lats = &config->tpebs_results;
> +
> + list_for_each_entry(t, retire_lats, event.nd) {
> + if (newline)
> + do_new_line_std(config, os);
> + fprintf(out, "%'*.2f %-*s", COUNTS_LEN, t->val, EVNAME_LEN, t->event.name);
> + fprintf(out, "%*ld %*d\n", RETIRE_LEN, t->count,
> + RETIRE_LEN, t->sum);
> + }
> +}
> +
> +static void print_retire_lat_csv(struct perf_stat_config *config,
> + struct outstate *os)
> +{
> + FILE *out = os->fh;
> + struct tpebs_retire_lat *t;
> + struct list_head *retire_lats = &config->tpebs_results;
> + const char *sep = config->csv_sep;
> +
> + list_for_each_entry(t, retire_lats, event.nd) {
> + fprintf(out, "%f%s%s%s%s%ld%s%d\n", t->val, sep, sep, t->event.name, sep,
> + t->count, sep, t->sum);
> + }
> +}
> +
> +static void print_retire_lat_json(struct perf_stat_config *config,
> + struct outstate *os)
> +{
> + FILE *out = os->fh;
> + struct tpebs_retire_lat *t;
> + struct list_head *retire_lats = &config->tpebs_results;
> +
> + fprintf(out, "{");
> + list_for_each_entry(t, retire_lats, event.nd) {
> + fprintf(out, "\"retire_latency-value\" : \"%f\", ", t->val);
> + fprintf(out, "\"event-name\" : \"%s\"", t->event.name);
> + fprintf(out, "\"sample-counts\" : \"%ld\"", t->count);
> + fprintf(out, "\"retire_latency-sum\" : \"%d\"", t->sum);
> + }
> + fprintf(out, "}");
> +}
> +
> +static void print_retire_lat(struct perf_stat_config *config,
> + struct outstate *os)
> +{
> + if (!&config->tpebs_results)
> + return;
> + if (config->json_output)
> + print_retire_lat_json(config, os);
> + else if (config->csv_output)
> + print_retire_lat_csv(config, os);
> + else
> + print_retire_lat_std(config, os);
> +}
> +
> static void new_line_csv(struct perf_stat_config *config, void *ctx)
> {
> struct outstate *os = ctx;
> @@ -1609,6 +1672,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
> break;
> }
>
> + print_retire_lat(config, &os);
> +
> print_footer(config);
>
> fflush(config->output);
> --
> 2.43.0
>
> -----Original Message-----
> From: Namhyung Kim <namhyung@kernel.org>
> Sent: Monday, April 1, 2024 2:04 PM
> To: Wang, Weilin <weilin.wang@intel.com>
> Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> <mingo@redhat.com>; Alexander Shishkin
> <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry
> <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers,
> Caleb <caleb.biggers@intel.com>
> Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to
> print out at the very end of print out
>
> On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@intel.com> wrote:
> >
> > From: Weilin Wang <weilin.wang@intel.com>
> >
> > Add print out functions so that users could read retire latency values.
> >
> > Example output:
> > In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES
> sample.
> > Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency
> value, count
> > and sum are all 0.
> >
> > Performance counter stats for 'system wide':
> >
> > 181,047,168 cpu_core/TOPDOWN.SLOTS/ # 0.6 %
> tma_dtlb_store
> > 3,195,608 cpu_core/topdown-retiring/
> > 40,156,649 cpu_core/topdown-mem-bound/
> > 3,550,925 cpu_core/topdown-bad-spec/
> > 117,571,818 cpu_core/topdown-fe-bound/
> > 57,118,087 cpu_core/topdown-be-bound/
> > 69,179 cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
> > 4,582 cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
> > 30,183,104 cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
> > 30,556,790 cpu_core/CPU_CLK_UNHALTED.THREAD/
> > 168,486 cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
> > 0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p 0 0
>
> The output is not aligned and I think it's hard to read.
> I think it should print the result like this:
>
> <sum> <event-name> # <val> average retired latency
Since we would like to use the average retire latency, I would think put average
at the beginning would be more consistent. So in format like:
<val> <event-name> <sum> <count> or <val> <event-name> <count> <sum> ?
I will work on the alignment.
Thanks,
Weilin
>
> Thanks,
> Namhyung
>
>
> >
> > 1.003105924 seconds time elapsed
> >
> > Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> > Reviewed-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/perf/util/stat-display.c | 65
> ++++++++++++++++++++++++++++++++++
> > 1 file changed, 65 insertions(+)
> >
> > diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> > index bfc1d705f437..6c043d9c9f81 100644
> > --- a/tools/perf/util/stat-display.c
> > +++ b/tools/perf/util/stat-display.c
> > @@ -21,6 +21,7 @@
> > #include "iostat.h"
> > #include "pmu.h"
> > #include "pmus.h"
> > +#include "metricgroup.h"
> >
> > #define CNTR_NOT_SUPPORTED "<not supported>"
> > #define CNTR_NOT_COUNTED "<not counted>"
> > @@ -34,6 +35,7 @@
> > #define COMM_LEN 16
> > #define PID_LEN 7
> > #define CPUS_LEN 4
> > +#define RETIRE_LEN 8
> >
> > static int aggr_header_lens[] = {
> > [AGGR_CORE] = 18,
> > @@ -426,6 +428,67 @@ static void print_metric_std(struct
> perf_stat_config *config,
> > fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
> > }
> >
> > +static void print_retire_lat_std(struct perf_stat_config *config,
> > + struct outstate *os)
> > +{
> > + FILE *out = os->fh;
> > + bool newline = os->newline;
> > + struct tpebs_retire_lat *t;
> > + struct list_head *retire_lats = &config->tpebs_results;
> > +
> > + list_for_each_entry(t, retire_lats, event.nd) {
> > + if (newline)
> > + do_new_line_std(config, os);
> > + fprintf(out, "%'*.2f %-*s", COUNTS_LEN, t->val, EVNAME_LEN, t-
> >event.name);
> > + fprintf(out, "%*ld %*d\n", RETIRE_LEN, t->count,
> > + RETIRE_LEN, t->sum);
> > + }
> > +}
> > +
> > +static void print_retire_lat_csv(struct perf_stat_config *config,
> > + struct outstate *os)
> > +{
> > + FILE *out = os->fh;
> > + struct tpebs_retire_lat *t;
> > + struct list_head *retire_lats = &config->tpebs_results;
> > + const char *sep = config->csv_sep;
> > +
> > + list_for_each_entry(t, retire_lats, event.nd) {
> > + fprintf(out, "%f%s%s%s%s%ld%s%d\n", t->val, sep, sep, t-
> >event.name, sep,
> > + t->count, sep, t->sum);
> > + }
> > +}
> > +
> > +static void print_retire_lat_json(struct perf_stat_config *config,
> > + struct outstate *os)
> > +{
> > + FILE *out = os->fh;
> > + struct tpebs_retire_lat *t;
> > + struct list_head *retire_lats = &config->tpebs_results;
> > +
> > + fprintf(out, "{");
> > + list_for_each_entry(t, retire_lats, event.nd) {
> > + fprintf(out, "\"retire_latency-value\" : \"%f\", ", t->val);
> > + fprintf(out, "\"event-name\" : \"%s\"", t->event.name);
> > + fprintf(out, "\"sample-counts\" : \"%ld\"", t->count);
> > + fprintf(out, "\"retire_latency-sum\" : \"%d\"", t->sum);
> > + }
> > + fprintf(out, "}");
> > +}
> > +
> > +static void print_retire_lat(struct perf_stat_config *config,
> > + struct outstate *os)
> > +{
> > + if (!&config->tpebs_results)
> > + return;
> > + if (config->json_output)
> > + print_retire_lat_json(config, os);
> > + else if (config->csv_output)
> > + print_retire_lat_csv(config, os);
> > + else
> > + print_retire_lat_std(config, os);
> > +}
> > +
> > static void new_line_csv(struct perf_stat_config *config, void *ctx)
> > {
> > struct outstate *os = ctx;
> > @@ -1609,6 +1672,8 @@ void evlist__print_counters(struct evlist *evlist,
> struct perf_stat_config *conf
> > break;
> > }
> >
> > + print_retire_lat(config, &os);
> > +
> > print_footer(config);
> >
> > fflush(config->output);
> > --
> > 2.43.0
> >
On Mon, Apr 1, 2024 at 2:08 PM Wang, Weilin <weilin.wang@intel.com> wrote: > > > > > -----Original Message----- > > From: Namhyung Kim <namhyung@kernel.org> > > Sent: Monday, April 1, 2024 2:04 PM > > To: Wang, Weilin <weilin.wang@intel.com> > > Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo > > <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar > > <mingo@redhat.com>; Alexander Shishkin > > <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter, > > Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>; > > linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry > > <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers, > > Caleb <caleb.biggers@intel.com> > > Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to > > print out at the very end of print out > > > > On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@intel.com> wrote: > > > > > > From: Weilin Wang <weilin.wang@intel.com> > > > > > > Add print out functions so that users could read retire latency values. > > > > > > Example output: > > > In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES > > sample. > > > Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency > > value, count > > > and sum are all 0. > > > > > > Performance counter stats for 'system wide': > > > > > > 181,047,168 cpu_core/TOPDOWN.SLOTS/ # 0.6 % > > tma_dtlb_store > > > 3,195,608 cpu_core/topdown-retiring/ > > > 40,156,649 cpu_core/topdown-mem-bound/ > > > 3,550,925 cpu_core/topdown-bad-spec/ > > > 117,571,818 cpu_core/topdown-fe-bound/ > > > 57,118,087 cpu_core/topdown-be-bound/ > > > 69,179 cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/ > > > 4,582 cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/ > > > 30,183,104 cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/ > > > 30,556,790 cpu_core/CPU_CLK_UNHALTED.THREAD/ > > > 168,486 cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/ > > > 0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p 0 0 > > > > The output is not aligned and I think it's hard to read. > > I think it should print the result like this: > > > > <sum> <event-name> # <val> average retired latency > > Since we would like to use the average retire latency, I would think put average > at the beginning would be more consistent. So in format like: > <val> <event-name> <sum> <count> or <val> <event-name> <count> <sum> ? But it's not consistent with others. When I see the perf stat output, I'd expect it shows the total count. And the average latency is a derived value so I think it can be treated as a metric. Thanks, Namhyung
> -----Original Message----- > From: Namhyung Kim <namhyung@kernel.org> > Sent: Monday, April 1, 2024 2:15 PM > To: Wang, Weilin <weilin.wang@intel.com> > Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo > <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar > <mingo@redhat.com>; Alexander Shishkin > <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter, > Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>; > linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry > <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers, > Caleb <caleb.biggers@intel.com> > Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to > print out at the very end of print out > > On Mon, Apr 1, 2024 at 2:08 PM Wang, Weilin <weilin.wang@intel.com> > wrote: > > > > > > > > > -----Original Message----- > > > From: Namhyung Kim <namhyung@kernel.org> > > > Sent: Monday, April 1, 2024 2:04 PM > > > To: Wang, Weilin <weilin.wang@intel.com> > > > Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo > > > <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar > > > <mingo@redhat.com>; Alexander Shishkin > > > <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter, > > > Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>; > > > linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, > Perry > > > <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; > Biggers, > > > Caleb <caleb.biggers@intel.com> > > > Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print > functions to > > > print out at the very end of print out > > > > > > On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@intel.com> wrote: > > > > > > > > From: Weilin Wang <weilin.wang@intel.com> > > > > > > > > Add print out functions so that users could read retire latency values. > > > > > > > > Example output: > > > > In this simple example, there is no > MEM_INST_RETIRED.STLB_HIT_STORES > > > sample. > > > > Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency > > > value, count > > > > and sum are all 0. > > > > > > > > Performance counter stats for 'system wide': > > > > > > > > 181,047,168 cpu_core/TOPDOWN.SLOTS/ # 0.6 % > > > tma_dtlb_store > > > > 3,195,608 cpu_core/topdown-retiring/ > > > > 40,156,649 cpu_core/topdown-mem-bound/ > > > > 3,550,925 cpu_core/topdown-bad-spec/ > > > > 117,571,818 cpu_core/topdown-fe-bound/ > > > > 57,118,087 cpu_core/topdown-be-bound/ > > > > 69,179 cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/ > > > > 4,582 cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/ > > > > 30,183,104 cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/ > > > > 30,556,790 cpu_core/CPU_CLK_UNHALTED.THREAD/ > > > > 168,486 cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/ > > > > 0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p 0 0 > > > > > > The output is not aligned and I think it's hard to read. > > > I think it should print the result like this: > > > > > > <sum> <event-name> # <val> average retired latency > > > > Since we would like to use the average retire latency, I would think put > average > > at the beginning would be more consistent. So in format like: > > <val> <event-name> <sum> <count> or <val> <event-name> <count> > <sum> ? > > But it's not consistent with others. When I see the perf stat > output, I'd expect it shows the total count. And the average > latency is a derived value so I think it can be treated as a metric. I think whether it is consistent or not depends on how we read this data. If there is multiplexing happening, would the total count value of events be the scaled counts or raw counts? If these are scaled counts, then these are derived value as well. But we do expect the first column shows the value we care most from the row. Thanks, Weilin > > Thanks, > Namhyung
© 2016 - 2026 Red Hat, Inc.