[RFC PATCH v7 5/6] perf stat: Add retire latency print functions to print out at the very end of print out

weilin.wang@intel.com posted 6 patches 1 year, 10 months ago
There is a newer version of this series
[RFC PATCH v7 5/6] perf stat: Add retire latency print functions to print out at the very end of print out
Posted by weilin.wang@intel.com 1 year, 10 months ago
From: Weilin Wang <weilin.wang@intel.com>

Add print out functions so that users could read retire latency values.

Example output:

 Performance counter stats for 'system wide':

            25,717      MEM_INST_RETIRED.SPLIT_STORES    #      2.2 %  tma_split_stores
        28,365,080      CPU_CLK_UNHALTED.THREAD
             24.00      MEM_INST_RETIRED.SPLIT_STORES:p  #       96        4

       2.054365083 seconds time elapsed

This output of retire latency data is in format:
 <val> <event-name> # <sum> <count>.

Signed-off-by: Weilin Wang <weilin.wang@intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/stat-display.c | 69 ++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bfc1d705f437..c32d484b53a9 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -21,6 +21,7 @@
 #include "iostat.h"
 #include "pmu.h"
 #include "pmus.h"
+#include "metricgroup.h"
 
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
@@ -34,6 +35,7 @@
 #define COMM_LEN     16
 #define PID_LEN       7
 #define CPUS_LEN      4
+#define RETIRE_LEN    8
 
 static int aggr_header_lens[] = {
 	[AGGR_CORE] 	= 18,
@@ -426,6 +428,71 @@ static void print_metric_std(struct perf_stat_config *config,
 	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
 }
 
+static void print_retire_lat_std(struct perf_stat_config *config,
+				 struct outstate *os)
+{
+	FILE *out = os->fh;
+	bool newline = os->newline;
+	struct tpebs_retire_lat *t;
+	struct list_head *retire_lats = &config->tpebs_results;
+
+	list_for_each_entry(t, retire_lats, event.nd) {
+		if (newline)
+			do_new_line_std(config, os);
+		fprintf(out, "%'*.2f ", COUNTS_LEN, t->val);
+		/* For print alignment */
+		fprintf(out, "%-*s ", config->unit_width, "");
+		fprintf(out, "%-*s", EVNAME_LEN, t->event.name);
+		fprintf(out, " # ");
+		fprintf(out, "%*d %*ld\n", RETIRE_LEN, t->sum,
+			 RETIRE_LEN, t->count);
+	}
+}
+
+static void print_retire_lat_csv(struct perf_stat_config *config,
+				 struct outstate *os)
+{
+	FILE *out = os->fh;
+	struct tpebs_retire_lat *t;
+	struct list_head *retire_lats = &config->tpebs_results;
+	const char *sep = config->csv_sep;
+
+	list_for_each_entry(t, retire_lats, event.nd) {
+		fprintf(out, "%f%s%s%s%s%ld%s%d\n", t->val, sep, sep, t->event.name, sep,
+			t->count, sep, t->sum);
+	}
+}
+
+static void print_retire_lat_json(struct perf_stat_config *config,
+				  struct outstate *os)
+{
+	FILE *out = os->fh;
+	struct tpebs_retire_lat *t;
+	struct list_head *retire_lats = &config->tpebs_results;
+
+	fprintf(out, "{");
+	list_for_each_entry(t, retire_lats, event.nd) {
+		fprintf(out, "\"retire_latency-value\" : \"%f\", ", t->val);
+		fprintf(out, "\"event-name\" : \"%s\"", t->event.name);
+		fprintf(out, "\"sample-counts\" : \"%ld\"", t->count);
+		fprintf(out, "\"retire_latency-sum\" : \"%d\"", t->sum);
+	}
+	fprintf(out, "}");
+}
+
+static void print_retire_lat(struct perf_stat_config *config,
+			     struct outstate *os)
+{
+	if (!&config->tpebs_results)
+		return;
+	if (config->json_output)
+		print_retire_lat_json(config, os);
+	else if (config->csv_output)
+		print_retire_lat_csv(config, os);
+	else
+		print_retire_lat_std(config, os);
+}
+
 static void new_line_csv(struct perf_stat_config *config, void *ctx)
 {
 	struct outstate *os = ctx;
@@ -1609,6 +1676,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 		break;
 	}
 
+	print_retire_lat(config, &os);
+
 	print_footer(config);
 
 	fflush(config->output);
-- 
2.43.0
Re: [RFC PATCH v7 5/6] perf stat: Add retire latency print functions to print out at the very end of print out
Posted by Namhyung Kim 1 year, 9 months ago
On Tue, Apr 2, 2024 at 2:46 PM <weilin.wang@intel.com> wrote:
>
> From: Weilin Wang <weilin.wang@intel.com>
>
> Add print out functions so that users could read retire latency values.
>
> Example output:
>
>  Performance counter stats for 'system wide':
>
>             25,717      MEM_INST_RETIRED.SPLIT_STORES    #      2.2 %  tma_split_stores
>         28,365,080      CPU_CLK_UNHALTED.THREAD
>              24.00      MEM_INST_RETIRED.SPLIT_STORES:p  #       96        4
>
>        2.054365083 seconds time elapsed
>
> This output of retire latency data is in format:
>  <val> <event-name> # <sum> <count>.

Nop, please follow the perf stat output format.  It'd be

  <sum> <event-name> # ...

Hmm.. maybe you don't need this at all as it'll have the event
anyway (probably without the 'p' modifier like in the example
above).

Then I think you can just add a metric value saying the average
latency in the comment area.

  Performance counter stats for 'system wide':

             25,717      MEM_INST_RETIRED.SPLIT_STORES    #      2.2 %
 tma_split_stores
                                                                     #
     24.0  average latency
       28,365,080      CPU_CLK_UNHALTED.THREAD

Thanks,
Namhyung

>
> Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> Reviewed-by: Ian Rogers <irogers@google.com>
Re: [RFC PATCH v7 5/6] perf stat: Add retire latency print functions to print out at the very end of print out
Posted by Namhyung Kim 1 year, 9 months ago
On Mon, Apr 22, 2024 at 12:55 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Tue, Apr 2, 2024 at 2:46 PM <weilin.wang@intel.com> wrote:
> >
> > From: Weilin Wang <weilin.wang@intel.com>
> >
> > Add print out functions so that users could read retire latency values.
> >
> > Example output:
> >
> >  Performance counter stats for 'system wide':
> >
> >             25,717      MEM_INST_RETIRED.SPLIT_STORES    #      2.2 %  tma_split_stores
> >         28,365,080      CPU_CLK_UNHALTED.THREAD
> >              24.00      MEM_INST_RETIRED.SPLIT_STORES:p  #       96        4
> >
> >        2.054365083 seconds time elapsed
> >
> > This output of retire latency data is in format:
> >  <val> <event-name> # <sum> <count>.
>
> Nop, please follow the perf stat output format.  It'd be
>
>   <sum> <event-name> # ...
>
> Hmm.. maybe you don't need this at all as it'll have the event
> anyway (probably without the 'p' modifier like in the example
> above).
>
> Then I think you can just add a metric value saying the average
> latency in the comment area.
>
>   Performance counter stats for 'system wide':
>
>              25,717      MEM_INST_RETIRED.SPLIT_STORES    #      2.2 %
>  tma_split_stores
>                                                                      #
>      24.0  average latency

Sorry for the line wrapping.  They should be in the same line.

Thanks,
Namhyung