[PATCH v2 1/3] perf sample: Remove arch notion of sample parsing

Ian Rogers posted 3 patches 6 months, 3 weeks ago
There is a newer version of this series
[PATCH v2 1/3] perf sample: Remove arch notion of sample parsing
Posted by Ian Rogers 6 months, 3 weeks ago
By definition arch sample parsing and synthesis will inhibit certain
kinds of cross-platform record then analysis (report, script,
etc.). Remove arch_perf_parse_sample_weight and
arch_perf_synthesize_sample_weight replacing with a common
implementation. Combine perf_sample p_stage_cyc and retire_lat to
capture the differing uses regardless of compiled for architecture.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/arch/powerpc/util/event.c       | 26 ---------------------
 tools/perf/arch/x86/tests/sample-parsing.c |  4 ++--
 tools/perf/arch/x86/util/event.c           | 27 ----------------------
 tools/perf/builtin-script.c                |  2 +-
 tools/perf/util/dlfilter.c                 |  2 +-
 tools/perf/util/event.h                    |  2 --
 tools/perf/util/evsel.c                    | 17 ++++++++++----
 tools/perf/util/hist.c                     |  4 ++--
 tools/perf/util/hist.h                     |  2 +-
 tools/perf/util/intel-tpebs.c              |  4 ++--
 tools/perf/util/sample.h                   |  5 +---
 tools/perf/util/session.c                  |  2 +-
 tools/perf/util/sort.c                     |  6 ++---
 tools/perf/util/synthetic-events.c         | 10 ++++++--
 14 files changed, 34 insertions(+), 79 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
index 77d8cc2b5691..024ac8b54c33 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -11,32 +11,6 @@
 #include "../../../util/debug.h"
 #include "../../../util/sample.h"
 
-void arch_perf_parse_sample_weight(struct perf_sample *data,
-				   const __u64 *array, u64 type)
-{
-	union perf_sample_weight weight;
-
-	weight.full = *array;
-	if (type & PERF_SAMPLE_WEIGHT)
-		data->weight = weight.full;
-	else {
-		data->weight = weight.var1_dw;
-		data->ins_lat = weight.var2_w;
-		data->p_stage_cyc = weight.var3_w;
-	}
-}
-
-void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
-					__u64 *array, u64 type)
-{
-	*array = data->weight;
-
-	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
-		*array &= 0xffffffff;
-		*array |= ((u64)data->ins_lat << 32);
-	}
-}
-
 const char *arch_perf_header_entry(const char *se_header)
 {
 	if (!strcmp(se_header, "Local INSTR Latency"))
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
index a061e8619267..95d8f7f1d2fb 100644
--- a/tools/perf/arch/x86/tests/sample-parsing.c
+++ b/tools/perf/arch/x86/tests/sample-parsing.c
@@ -29,7 +29,7 @@ static bool samples_same(const struct perf_sample *s1,
 {
 	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
 		COMP(ins_lat);
-		COMP(retire_lat);
+		COMP(p_stage_cyc_or_retire_lat);
 	}
 
 	return true;
@@ -50,7 +50,7 @@ static int do_test(u64 sample_type)
 	struct perf_sample sample = {
 		.weight		= 101,
 		.ins_lat        = 102,
-		.retire_lat     = 103,
+		.p_stage_cyc_or_retire_lat = 103,
 	};
 	struct perf_sample sample_out;
 	size_t i, sz, bufsz;
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index a0400707180c..576c1c36046c 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -92,33 +92,6 @@ int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool,
 
 #endif
 
-void arch_perf_parse_sample_weight(struct perf_sample *data,
-				   const __u64 *array, u64 type)
-{
-	union perf_sample_weight weight;
-
-	weight.full = *array;
-	if (type & PERF_SAMPLE_WEIGHT)
-		data->weight = weight.full;
-	else {
-		data->weight = weight.var1_dw;
-		data->ins_lat = weight.var2_w;
-		data->retire_lat = weight.var3_w;
-	}
-}
-
-void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
-					__u64 *array, u64 type)
-{
-	*array = data->weight;
-
-	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
-		*array &= 0xffffffff;
-		*array |= ((u64)data->ins_lat << 32);
-		*array |= ((u64)data->retire_lat << 48);
-	}
-}
-
 const char *arch_perf_header_entry(const char *se_header)
 {
 	if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6c3bf74dd78c..c02c435e0f0b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2251,7 +2251,7 @@ static void process_event(struct perf_script *script,
 		fprintf(fp, "%16" PRIu16, sample->ins_lat);
 
 	if (PRINT_FIELD(RETIRE_LAT))
-		fprintf(fp, "%16" PRIu16, sample->retire_lat);
+		fprintf(fp, "%16" PRIu16, sample->p_stage_cyc_or_retire_lat);
 
 	if (PRINT_FIELD(CGROUP)) {
 		const char *cgrp_name;
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
index ddacef881af2..7e61ddfa66b8 100644
--- a/tools/perf/util/dlfilter.c
+++ b/tools/perf/util/dlfilter.c
@@ -526,7 +526,7 @@ int dlfilter__do_filter_event(struct dlfilter *d,
 	ASSIGN(period);
 	ASSIGN(weight);
 	ASSIGN(ins_lat);
-	ASSIGN(p_stage_cyc);
+	d_sample.p_stage_cyc = sample->p_stage_cyc_or_retire_lat;
 	ASSIGN(transaction);
 	ASSIGN(insn_cnt);
 	ASSIGN(cyc_cnt);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 664bf39567ce..119bce37f4fd 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -390,8 +390,6 @@ extern unsigned int proc_map_timeout;
 #define PAGE_SIZE_NAME_LEN	32
 char *get_page_size_name(u64 size, char *str);
 
-void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
-void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
 const char *arch_perf_header_entry(const char *se_header);
 int arch_support_sort_key(const char *sort_key);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d55482f094bf..097ab98bb81a 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2846,11 +2846,18 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size)
 	return 0;
 }
 
-void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
-					  const __u64 *array,
-					  u64 type __maybe_unused)
+static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type)
 {
-	data->weight = *array;
+	union perf_sample_weight weight;
+
+	weight.full = *array;
+	if (type & PERF_SAMPLE_WEIGHT) {
+		data->weight = weight.full;
+	} else {
+		data->weight = weight.var1_dw;
+		data->ins_lat = weight.var2_w;
+		data->p_stage_cyc_or_retire_lat = weight.var3_w;
+	}
 }
 
 u64 evsel__bitfield_swap_branch_flags(u64 value)
@@ -3236,7 +3243,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 
 	if (type & PERF_SAMPLE_WEIGHT_TYPE) {
 		OVERFLOW_CHECK_u64(array);
-		arch_perf_parse_sample_weight(data, array, type);
+		perf_parse_sample_weight(data, array, type);
 		array++;
 	}
 
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index afc6855327ab..ae9803dca0b1 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -829,7 +829,7 @@ __hists__add_entry(struct hists *hists,
 			.period	= sample->period,
 			.weight1 = sample->weight,
 			.weight2 = sample->ins_lat,
-			.weight3 = sample->p_stage_cyc,
+			.weight3 = sample->p_stage_cyc_or_retire_lat,
 			.latency = al->latency,
 		},
 		.parent = sym_parent,
@@ -846,7 +846,7 @@ __hists__add_entry(struct hists *hists,
 		.time = hist_time(sample->time),
 		.weight = sample->weight,
 		.ins_lat = sample->ins_lat,
-		.p_stage_cyc = sample->p_stage_cyc,
+		.p_stage_cyc_or_retire_lat = sample->p_stage_cyc_or_retire_lat,
 		.simd_flags = sample->simd_flags,
 	}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index c64254088fc7..67033bdabcf4 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -255,7 +255,7 @@ struct hist_entry {
 	u64			code_page_size;
 	u64			weight;
 	u64			ins_lat;
-	u64			p_stage_cyc;
+	u64			p_stage_cyc_or_retire_lat;
 	s32			socket;
 	s32			cpu;
 	int			parallelism;
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
index 4ad4bc118ea5..ec2f3ecf1e1c 100644
--- a/tools/perf/util/intel-tpebs.c
+++ b/tools/perf/util/intel-tpebs.c
@@ -202,8 +202,8 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
 	 * latency value will be used. Save the number of samples and the sum of
 	 * retire latency value for each event.
 	 */
-	t->last = sample->retire_lat;
-	update_stats(&t->stats, sample->retire_lat);
+	t->last = sample->p_stage_cyc_or_retire_lat;
+	update_stats(&t->stats, sample->p_stage_cyc_or_retire_lat);
 	mutex_unlock(tpebs_mtx_get());
 	return 0;
 }
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 0e96240052e9..3330d18fb5fd 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -104,10 +104,7 @@ struct perf_sample {
 	u8  cpumode;
 	u16 misc;
 	u16 ins_lat;
-	union {
-		u16 p_stage_cyc;
-		u16 retire_lat;
-	};
+	u16 p_stage_cyc_or_retire_lat;
 	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
 	char insn[MAX_INSN];
 	void *raw_data;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index a320672c264e..451bc24ccfba 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1094,7 +1094,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
 		printf("... weight: %" PRIu64 "", sample->weight);
 			if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
 				printf(",0x%"PRIx16"", sample->ins_lat);
-				printf(",0x%"PRIx16"", sample->p_stage_cyc);
+				printf(",0x%"PRIx16"", sample->p_stage_cyc_or_retire_lat);
 			}
 		printf("\n");
 	}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 45e654653960..dda4ef0b5a73 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1879,21 +1879,21 @@ struct sort_entry sort_global_ins_lat = {
 static int64_t
 sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return left->p_stage_cyc - right->p_stage_cyc;
+	return left->p_stage_cyc_or_retire_lat - right->p_stage_cyc_or_retire_lat;
 }
 
 static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
 	return repsep_snprintf(bf, size, "%-*u", width,
-			he->p_stage_cyc * he->stat.nr_events);
+			he->p_stage_cyc_or_retire_lat * he->stat.nr_events);
 }
 
 
 static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
+	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc_or_retire_lat);
 }
 
 struct sort_entry sort_local_p_stage_cyc = {
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 2fc4d0537840..449a41900fc4 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1567,10 +1567,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 	return result;
 }
 
-void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+static void perf_synthesize_sample_weight(const struct perf_sample *data,
 					       __u64 *array, u64 type __maybe_unused)
 {
 	*array = data->weight;
+
+	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
+		*array &= 0xffffffff;
+		*array |= ((u64)data->ins_lat << 32);
+		*array |= ((u64)data->p_stage_cyc_or_retire_lat << 48);
+	}
 }
 
 static __u64 *copy_read_group_values(__u64 *array, __u64 read_format,
@@ -1730,7 +1736,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
 	}
 
 	if (type & PERF_SAMPLE_WEIGHT_TYPE) {
-		arch_perf_synthesize_sample_weight(sample, array, type);
+		perf_synthesize_sample_weight(sample, array, type);
 		array++;
 	}
 
-- 
2.49.0.1112.g889b7c5bd8-goog
Re: [PATCH v2 1/3] perf sample: Remove arch notion of sample parsing
Posted by Liang, Kan 6 months, 3 weeks ago

On 2025-05-21 9:54 a.m., Ian Rogers wrote:
> By definition arch sample parsing and synthesis will inhibit certain
> kinds of cross-platform record then analysis (report, script,
> etc.). Remove arch_perf_parse_sample_weight and
> arch_perf_synthesize_sample_weight replacing with a common
> implementation. Combine perf_sample p_stage_cyc and retire_lat to
> capture the differing uses regardless of compiled for architecture.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/arch/powerpc/util/event.c       | 26 ---------------------
>  tools/perf/arch/x86/tests/sample-parsing.c |  4 ++--
>  tools/perf/arch/x86/util/event.c           | 27 ----------------------
>  tools/perf/builtin-script.c                |  2 +-
>  tools/perf/util/dlfilter.c                 |  2 +-
>  tools/perf/util/event.h                    |  2 --
>  tools/perf/util/evsel.c                    | 17 ++++++++++----
>  tools/perf/util/hist.c                     |  4 ++--
>  tools/perf/util/hist.h                     |  2 +-
>  tools/perf/util/intel-tpebs.c              |  4 ++--
>  tools/perf/util/sample.h                   |  5 +---
>  tools/perf/util/session.c                  |  2 +-
>  tools/perf/util/sort.c                     |  6 ++---
>  tools/perf/util/synthetic-events.c         | 10 ++++++--
>  14 files changed, 34 insertions(+), 79 deletions(-)
> 
> diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
> index 77d8cc2b5691..024ac8b54c33 100644
> --- a/tools/perf/arch/powerpc/util/event.c
> +++ b/tools/perf/arch/powerpc/util/event.c
> @@ -11,32 +11,6 @@
>  #include "../../../util/debug.h"
>  #include "../../../util/sample.h"
>  
> -void arch_perf_parse_sample_weight(struct perf_sample *data,
> -				   const __u64 *array, u64 type)
> -{
> -	union perf_sample_weight weight;
> -
> -	weight.full = *array;
> -	if (type & PERF_SAMPLE_WEIGHT)
> -		data->weight = weight.full;
> -	else {
> -		data->weight = weight.var1_dw;
> -		data->ins_lat = weight.var2_w;
> -		data->p_stage_cyc = weight.var3_w;
> -	}
> -}
> -
> -void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
> -					__u64 *array, u64 type)
> -{
> -	*array = data->weight;
> -
> -	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
> -		*array &= 0xffffffff;
> -		*array |= ((u64)data->ins_lat << 32);
> -	}
> -}
> -
>  const char *arch_perf_header_entry(const char *se_header)
>  {
>  	if (!strcmp(se_header, "Local INSTR Latency"))
> diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
> index a061e8619267..95d8f7f1d2fb 100644
> --- a/tools/perf/arch/x86/tests/sample-parsing.c
> +++ b/tools/perf/arch/x86/tests/sample-parsing.c
> @@ -29,7 +29,7 @@ static bool samples_same(const struct perf_sample *s1,
>  {
>  	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
>  		COMP(ins_lat);
> -		COMP(retire_lat);
> +		COMP(p_stage_cyc_or_retire_lat);
>  	}
>  
>  	return true;
> @@ -50,7 +50,7 @@ static int do_test(u64 sample_type)
>  	struct perf_sample sample = {
>  		.weight		= 101,
>  		.ins_lat        = 102,
> -		.retire_lat     = 103,
> +		.p_stage_cyc_or_retire_lat = 103,
>  	};
>  	struct perf_sample sample_out;
>  	size_t i, sz, bufsz;
> diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
> index a0400707180c..576c1c36046c 100644
> --- a/tools/perf/arch/x86/util/event.c
> +++ b/tools/perf/arch/x86/util/event.c
> @@ -92,33 +92,6 @@ int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool,
>  
>  #endif
>  
> -void arch_perf_parse_sample_weight(struct perf_sample *data,
> -				   const __u64 *array, u64 type)
> -{
> -	union perf_sample_weight weight;
> -
> -	weight.full = *array;
> -	if (type & PERF_SAMPLE_WEIGHT)
> -		data->weight = weight.full;
> -	else {
> -		data->weight = weight.var1_dw;
> -		data->ins_lat = weight.var2_w;
> -		data->retire_lat = weight.var3_w;
> -	}
> -}
> -
> -void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
> -					__u64 *array, u64 type)
> -{
> -	*array = data->weight;
> -
> -	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
> -		*array &= 0xffffffff;
> -		*array |= ((u64)data->ins_lat << 32);
> -		*array |= ((u64)data->retire_lat << 48);
> -	}
> -}
> -
>  const char *arch_perf_header_entry(const char *se_header)
>  {
>  	if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 6c3bf74dd78c..c02c435e0f0b 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -2251,7 +2251,7 @@ static void process_event(struct perf_script *script,
>  		fprintf(fp, "%16" PRIu16, sample->ins_lat);
>  
>  	if (PRINT_FIELD(RETIRE_LAT))
> -		fprintf(fp, "%16" PRIu16, sample->retire_lat);
> +		fprintf(fp, "%16" PRIu16, sample->p_stage_cyc_or_retire_lat);
>  
>  	if (PRINT_FIELD(CGROUP)) {
>  		const char *cgrp_name;
> diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
> index ddacef881af2..7e61ddfa66b8 100644
> --- a/tools/perf/util/dlfilter.c
> +++ b/tools/perf/util/dlfilter.c
> @@ -526,7 +526,7 @@ int dlfilter__do_filter_event(struct dlfilter *d,
>  	ASSIGN(period);
>  	ASSIGN(weight);
>  	ASSIGN(ins_lat);
> -	ASSIGN(p_stage_cyc);
> +	d_sample.p_stage_cyc = sample->p_stage_cyc_or_retire_lat;

Can you please move it out of the ASSIGN() area? Maybe right after
d_sample.size  = sizeof(d_sample);.
It looks strange to insert a non-macro assignment here.


>  	ASSIGN(transaction);
>  	ASSIGN(insn_cnt);
>  	ASSIGN(cyc_cnt);
> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
> index 664bf39567ce..119bce37f4fd 100644
> --- a/tools/perf/util/event.h
> +++ b/tools/perf/util/event.h
> @@ -390,8 +390,6 @@ extern unsigned int proc_map_timeout;
>  #define PAGE_SIZE_NAME_LEN	32
>  char *get_page_size_name(u64 size, char *str);
>  
> -void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
> -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
>  const char *arch_perf_header_entry(const char *se_header);
>  int arch_support_sort_key(const char *sort_key);
>  
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index d55482f094bf..097ab98bb81a 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -2846,11 +2846,18 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size)
>  	return 0;
>  }
>  
> -void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
> -					  const __u64 *array,
> -					  u64 type __maybe_unused)
> +static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type)
>  {
> -	data->weight = *array;
> +	union perf_sample_weight weight;
> +
> +	weight.full = *array;
> +	if (type & PERF_SAMPLE_WEIGHT) {
> +		data->weight = weight.full;
> +	} else {
> +		data->weight = weight.var1_dw;
> +		data->ins_lat = weight.var2_w;
> +		data->p_stage_cyc_or_retire_lat = weight.var3_w;
> +	}
>  }
>  

It works for X86, but I'm not sure other ARCHs.
Since the PERF_SAMPLE_WEIGHT_STRUCT is newly added type, should it be
safer to do the below?
	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
		data->weight = weight.var1_dw;
		data->ins_lat = weight.var2_w;
		data->p_stage_cyc_or_retire_lat = weight.var3_w;
	} else
		data->weight = weight.full;

The default behavior is always data->weight = weight.full; unless an
ARCH apply the new type PERF_SAMPLE_WEIGHT_STRUCT.

>  u64 evsel__bitfield_swap_branch_flags(u64 value)
> @@ -3236,7 +3243,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>  
>  	if (type & PERF_SAMPLE_WEIGHT_TYPE) {
>  		OVERFLOW_CHECK_u64(array);
> -		arch_perf_parse_sample_weight(data, array, type);
> +		perf_parse_sample_weight(data, array, type);
>  		array++;
>  	}
>  
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index afc6855327ab..ae9803dca0b1 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -829,7 +829,7 @@ __hists__add_entry(struct hists *hists,
>  			.period	= sample->period,
>  			.weight1 = sample->weight,
>  			.weight2 = sample->ins_lat,
> -			.weight3 = sample->p_stage_cyc,
> +			.weight3 = sample->p_stage_cyc_or_retire_lat,
>  			.latency = al->latency,
>  		},
>  		.parent = sym_parent,
> @@ -846,7 +846,7 @@ __hists__add_entry(struct hists *hists,
>  		.time = hist_time(sample->time),
>  		.weight = sample->weight,
>  		.ins_lat = sample->ins_lat,
> -		.p_stage_cyc = sample->p_stage_cyc,
> +		.p_stage_cyc_or_retire_lat = sample->p_stage_cyc_or_retire_lat,
>  		.simd_flags = sample->simd_flags,
>  	}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
>  
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index c64254088fc7..67033bdabcf4 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -255,7 +255,7 @@ struct hist_entry {
>  	u64			code_page_size;
>  	u64			weight;
>  	u64			ins_lat;
> -	u64			p_stage_cyc;
> +	u64			p_stage_cyc_or_retire_lat;
>  	s32			socket;
>  	s32			cpu;
>  	int			parallelism;
> diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
> index 4ad4bc118ea5..ec2f3ecf1e1c 100644
> --- a/tools/perf/util/intel-tpebs.c
> +++ b/tools/perf/util/intel-tpebs.c
> @@ -202,8 +202,8 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
>  	 * latency value will be used. Save the number of samples and the sum of
>  	 * retire latency value for each event.
>  	 */
> -	t->last = sample->retire_lat;
> -	update_stats(&t->stats, sample->retire_lat);
> +	t->last = sample->p_stage_cyc_or_retire_lat;
> +	update_stats(&t->stats, sample->p_stage_cyc_or_retire_lat);
>  	mutex_unlock(tpebs_mtx_get());
>  	return 0;
>  }
> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> index 0e96240052e9..3330d18fb5fd 100644
> --- a/tools/perf/util/sample.h
> +++ b/tools/perf/util/sample.h
> @@ -104,10 +104,7 @@ struct perf_sample {
>  	u8  cpumode;
>  	u16 misc;
>  	u16 ins_lat;
> -	union {
> -		u16 p_stage_cyc;
> -		u16 retire_lat;
> -	};
> +	u16 p_stage_cyc_or_retire_lat;
>  	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
>  	char insn[MAX_INSN];
>  	void *raw_data;
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index a320672c264e..451bc24ccfba 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -1094,7 +1094,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
>  		printf("... weight: %" PRIu64 "", sample->weight);
>  			if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
>  				printf(",0x%"PRIx16"", sample->ins_lat);
> -				printf(",0x%"PRIx16"", sample->p_stage_cyc);
> +				printf(",0x%"PRIx16"", sample->p_stage_cyc_or_retire_lat);
>  			}
>  		printf("\n");
>  	}
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 45e654653960..dda4ef0b5a73 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -1879,21 +1879,21 @@ struct sort_entry sort_global_ins_lat = {
>  static int64_t
>  sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
>  {
> -	return left->p_stage_cyc - right->p_stage_cyc;
> +	return left->p_stage_cyc_or_retire_lat - right->p_stage_cyc_or_retire_lat;
>  }
>  
>  static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>  					size_t size, unsigned int width)
>  {
>  	return repsep_snprintf(bf, size, "%-*u", width,
> -			he->p_stage_cyc * he->stat.nr_events);
> +			he->p_stage_cyc_or_retire_lat * he->stat.nr_events);
>  }
>  
>  
>  static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
>  					size_t size, unsigned int width)
>  {
> -	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
> +	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc_or_retire_lat);
>  }
>  
>  struct sort_entry sort_local_p_stage_cyc = {
> diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
> index 2fc4d0537840..449a41900fc4 100644
> --- a/tools/perf/util/synthetic-events.c
> +++ b/tools/perf/util/synthetic-events.c
> @@ -1567,10 +1567,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
>  	return result;
>  }
>  
> -void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data,
> +static void perf_synthesize_sample_weight(const struct perf_sample *data,
>  					       __u64 *array, u64 type __maybe_unused)
>  {
>  	*array = data->weight;
> +
> +	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
> +		*array &= 0xffffffff;
> +		*array |= ((u64)data->ins_lat << 32);
> +		*array |= ((u64)data->p_stage_cyc_or_retire_lat << 48);
> +	}

It works for X86, but powerpc seems have a different behavior. The
p_stage_cyc is missed here. Not sure if it intends.

Thanks,
Kan

>  }
>  
>  static __u64 *copy_read_group_values(__u64 *array, __u64 read_format,
> @@ -1730,7 +1736,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
>  	}
>  
>  	if (type & PERF_SAMPLE_WEIGHT_TYPE) {
> -		arch_perf_synthesize_sample_weight(sample, array, type);
> +		perf_synthesize_sample_weight(sample, array, type);
>  		array++;
>  	}
>
Re: [PATCH v2 1/3] perf sample: Remove arch notion of sample parsing
Posted by Ian Rogers 6 months, 3 weeks ago
On Wed, May 21, 2025 at 8:42 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
>
>
>
> On 2025-05-21 9:54 a.m., Ian Rogers wrote:
> > By definition arch sample parsing and synthesis will inhibit certain
> > kinds of cross-platform record then analysis (report, script,
> > etc.). Remove arch_perf_parse_sample_weight and
> > arch_perf_synthesize_sample_weight replacing with a common
> > implementation. Combine perf_sample p_stage_cyc and retire_lat to
> > capture the differing uses regardless of compiled for architecture.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/arch/powerpc/util/event.c       | 26 ---------------------
> >  tools/perf/arch/x86/tests/sample-parsing.c |  4 ++--
> >  tools/perf/arch/x86/util/event.c           | 27 ----------------------
> >  tools/perf/builtin-script.c                |  2 +-
> >  tools/perf/util/dlfilter.c                 |  2 +-
> >  tools/perf/util/event.h                    |  2 --
> >  tools/perf/util/evsel.c                    | 17 ++++++++++----
> >  tools/perf/util/hist.c                     |  4 ++--
> >  tools/perf/util/hist.h                     |  2 +-
> >  tools/perf/util/intel-tpebs.c              |  4 ++--
> >  tools/perf/util/sample.h                   |  5 +---
> >  tools/perf/util/session.c                  |  2 +-
> >  tools/perf/util/sort.c                     |  6 ++---
> >  tools/perf/util/synthetic-events.c         | 10 ++++++--
> >  14 files changed, 34 insertions(+), 79 deletions(-)
> >
> > diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
> > index 77d8cc2b5691..024ac8b54c33 100644
> > --- a/tools/perf/arch/powerpc/util/event.c
> > +++ b/tools/perf/arch/powerpc/util/event.c
> > @@ -11,32 +11,6 @@
> >  #include "../../../util/debug.h"
> >  #include "../../../util/sample.h"
> >
> > -void arch_perf_parse_sample_weight(struct perf_sample *data,
> > -                                const __u64 *array, u64 type)
> > -{
> > -     union perf_sample_weight weight;
> > -
> > -     weight.full = *array;
> > -     if (type & PERF_SAMPLE_WEIGHT)
> > -             data->weight = weight.full;
> > -     else {
> > -             data->weight = weight.var1_dw;
> > -             data->ins_lat = weight.var2_w;
> > -             data->p_stage_cyc = weight.var3_w;
> > -     }
> > -}
> > -
> > -void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
> > -                                     __u64 *array, u64 type)
> > -{
> > -     *array = data->weight;
> > -
> > -     if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
> > -             *array &= 0xffffffff;
> > -             *array |= ((u64)data->ins_lat << 32);
> > -     }
> > -}
> > -
> >  const char *arch_perf_header_entry(const char *se_header)
> >  {
> >       if (!strcmp(se_header, "Local INSTR Latency"))
> > diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
> > index a061e8619267..95d8f7f1d2fb 100644
> > --- a/tools/perf/arch/x86/tests/sample-parsing.c
> > +++ b/tools/perf/arch/x86/tests/sample-parsing.c
> > @@ -29,7 +29,7 @@ static bool samples_same(const struct perf_sample *s1,
> >  {
> >       if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
> >               COMP(ins_lat);
> > -             COMP(retire_lat);
> > +             COMP(p_stage_cyc_or_retire_lat);
> >       }
> >
> >       return true;
> > @@ -50,7 +50,7 @@ static int do_test(u64 sample_type)
> >       struct perf_sample sample = {
> >               .weight         = 101,
> >               .ins_lat        = 102,
> > -             .retire_lat     = 103,
> > +             .p_stage_cyc_or_retire_lat = 103,
> >       };
> >       struct perf_sample sample_out;
> >       size_t i, sz, bufsz;
> > diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
> > index a0400707180c..576c1c36046c 100644
> > --- a/tools/perf/arch/x86/util/event.c
> > +++ b/tools/perf/arch/x86/util/event.c
> > @@ -92,33 +92,6 @@ int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool,
> >
> >  #endif
> >
> > -void arch_perf_parse_sample_weight(struct perf_sample *data,
> > -                                const __u64 *array, u64 type)
> > -{
> > -     union perf_sample_weight weight;
> > -
> > -     weight.full = *array;
> > -     if (type & PERF_SAMPLE_WEIGHT)
> > -             data->weight = weight.full;
> > -     else {
> > -             data->weight = weight.var1_dw;
> > -             data->ins_lat = weight.var2_w;
> > -             data->retire_lat = weight.var3_w;
> > -     }
> > -}
> > -
> > -void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
> > -                                     __u64 *array, u64 type)
> > -{
> > -     *array = data->weight;
> > -
> > -     if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
> > -             *array &= 0xffffffff;
> > -             *array |= ((u64)data->ins_lat << 32);
> > -             *array |= ((u64)data->retire_lat << 48);
> > -     }
> > -}
> > -
> >  const char *arch_perf_header_entry(const char *se_header)
> >  {
> >       if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
> > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > index 6c3bf74dd78c..c02c435e0f0b 100644
> > --- a/tools/perf/builtin-script.c
> > +++ b/tools/perf/builtin-script.c
> > @@ -2251,7 +2251,7 @@ static void process_event(struct perf_script *script,
> >               fprintf(fp, "%16" PRIu16, sample->ins_lat);
> >
> >       if (PRINT_FIELD(RETIRE_LAT))
> > -             fprintf(fp, "%16" PRIu16, sample->retire_lat);
> > +             fprintf(fp, "%16" PRIu16, sample->p_stage_cyc_or_retire_lat);
> >
> >       if (PRINT_FIELD(CGROUP)) {
> >               const char *cgrp_name;
> > diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
> > index ddacef881af2..7e61ddfa66b8 100644
> > --- a/tools/perf/util/dlfilter.c
> > +++ b/tools/perf/util/dlfilter.c
> > @@ -526,7 +526,7 @@ int dlfilter__do_filter_event(struct dlfilter *d,
> >       ASSIGN(period);
> >       ASSIGN(weight);
> >       ASSIGN(ins_lat);
> > -     ASSIGN(p_stage_cyc);
> > +     d_sample.p_stage_cyc = sample->p_stage_cyc_or_retire_lat;
>
> Can you please move it out of the ASSIGN() area? Maybe right after
> d_sample.size  = sizeof(d_sample);.
> It looks strange to insert a non-macro assignment here.

Ok. Will fix in v3.

>
> >       ASSIGN(transaction);
> >       ASSIGN(insn_cnt);
> >       ASSIGN(cyc_cnt);
> > diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
> > index 664bf39567ce..119bce37f4fd 100644
> > --- a/tools/perf/util/event.h
> > +++ b/tools/perf/util/event.h
> > @@ -390,8 +390,6 @@ extern unsigned int proc_map_timeout;
> >  #define PAGE_SIZE_NAME_LEN   32
> >  char *get_page_size_name(u64 size, char *str);
> >
> > -void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
> > -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
> >  const char *arch_perf_header_entry(const char *se_header);
> >  int arch_support_sort_key(const char *sort_key);
> >
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index d55482f094bf..097ab98bb81a 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -2846,11 +2846,18 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size)
> >       return 0;
> >  }
> >
> > -void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
> > -                                       const __u64 *array,
> > -                                       u64 type __maybe_unused)
> > +static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type)
> >  {
> > -     data->weight = *array;
> > +     union perf_sample_weight weight;
> > +
> > +     weight.full = *array;
> > +     if (type & PERF_SAMPLE_WEIGHT) {
> > +             data->weight = weight.full;
> > +     } else {
> > +             data->weight = weight.var1_dw;
> > +             data->ins_lat = weight.var2_w;
> > +             data->p_stage_cyc_or_retire_lat = weight.var3_w;
> > +     }
> >  }
> >
>
> It works for X86, but I'm not sure other ARCHs.
> Since the PERF_SAMPLE_WEIGHT_STRUCT is newly added type, should it be
> safer to do the below?
>         if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
>                 data->weight = weight.var1_dw;
>                 data->ins_lat = weight.var2_w;
>                 data->p_stage_cyc_or_retire_lat = weight.var3_w;
>         } else
>                 data->weight = weight.full;
>
> The default behavior is always data->weight = weight.full; unless an
> ARCH apply the new type PERF_SAMPLE_WEIGHT_STRUCT.

Sure. I don't think it will change any of the behavior.

> >  u64 evsel__bitfield_swap_branch_flags(u64 value)
> > @@ -3236,7 +3243,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> >
> >       if (type & PERF_SAMPLE_WEIGHT_TYPE) {
> >               OVERFLOW_CHECK_u64(array);
> > -             arch_perf_parse_sample_weight(data, array, type);
> > +             perf_parse_sample_weight(data, array, type);
> >               array++;
> >       }
> >
> > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> > index afc6855327ab..ae9803dca0b1 100644
> > --- a/tools/perf/util/hist.c
> > +++ b/tools/perf/util/hist.c
> > @@ -829,7 +829,7 @@ __hists__add_entry(struct hists *hists,
> >                       .period = sample->period,
> >                       .weight1 = sample->weight,
> >                       .weight2 = sample->ins_lat,
> > -                     .weight3 = sample->p_stage_cyc,
> > +                     .weight3 = sample->p_stage_cyc_or_retire_lat,
> >                       .latency = al->latency,
> >               },
> >               .parent = sym_parent,
> > @@ -846,7 +846,7 @@ __hists__add_entry(struct hists *hists,
> >               .time = hist_time(sample->time),
> >               .weight = sample->weight,
> >               .ins_lat = sample->ins_lat,
> > -             .p_stage_cyc = sample->p_stage_cyc,
> > +             .p_stage_cyc_or_retire_lat = sample->p_stage_cyc_or_retire_lat,
> >               .simd_flags = sample->simd_flags,
> >       }, *he = hists__findnew_entry(hists, &entry, al, sample_self);
> >
> > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> > index c64254088fc7..67033bdabcf4 100644
> > --- a/tools/perf/util/hist.h
> > +++ b/tools/perf/util/hist.h
> > @@ -255,7 +255,7 @@ struct hist_entry {
> >       u64                     code_page_size;
> >       u64                     weight;
> >       u64                     ins_lat;
> > -     u64                     p_stage_cyc;
> > +     u64                     p_stage_cyc_or_retire_lat;
> >       s32                     socket;
> >       s32                     cpu;
> >       int                     parallelism;
> > diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
> > index 4ad4bc118ea5..ec2f3ecf1e1c 100644
> > --- a/tools/perf/util/intel-tpebs.c
> > +++ b/tools/perf/util/intel-tpebs.c
> > @@ -202,8 +202,8 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
> >        * latency value will be used. Save the number of samples and the sum of
> >        * retire latency value for each event.
> >        */
> > -     t->last = sample->retire_lat;
> > -     update_stats(&t->stats, sample->retire_lat);
> > +     t->last = sample->p_stage_cyc_or_retire_lat;
> > +     update_stats(&t->stats, sample->p_stage_cyc_or_retire_lat);
> >       mutex_unlock(tpebs_mtx_get());
> >       return 0;
> >  }
> > diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> > index 0e96240052e9..3330d18fb5fd 100644
> > --- a/tools/perf/util/sample.h
> > +++ b/tools/perf/util/sample.h
> > @@ -104,10 +104,7 @@ struct perf_sample {
> >       u8  cpumode;
> >       u16 misc;
> >       u16 ins_lat;
> > -     union {
> > -             u16 p_stage_cyc;
> > -             u16 retire_lat;
> > -     };
> > +     u16 p_stage_cyc_or_retire_lat;
> >       bool no_hw_idx;         /* No hw_idx collected in branch_stack */
> >       char insn[MAX_INSN];
> >       void *raw_data;
> > diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> > index a320672c264e..451bc24ccfba 100644
> > --- a/tools/perf/util/session.c
> > +++ b/tools/perf/util/session.c
> > @@ -1094,7 +1094,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
> >               printf("... weight: %" PRIu64 "", sample->weight);
> >                       if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
> >                               printf(",0x%"PRIx16"", sample->ins_lat);
> > -                             printf(",0x%"PRIx16"", sample->p_stage_cyc);
> > +                             printf(",0x%"PRIx16"", sample->p_stage_cyc_or_retire_lat);
> >                       }
> >               printf("\n");
> >       }
> > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> > index 45e654653960..dda4ef0b5a73 100644
> > --- a/tools/perf/util/sort.c
> > +++ b/tools/perf/util/sort.c
> > @@ -1879,21 +1879,21 @@ struct sort_entry sort_global_ins_lat = {
> >  static int64_t
> >  sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
> >  {
> > -     return left->p_stage_cyc - right->p_stage_cyc;
> > +     return left->p_stage_cyc_or_retire_lat - right->p_stage_cyc_or_retire_lat;
> >  }
> >
> >  static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> >                                       size_t size, unsigned int width)
> >  {
> >       return repsep_snprintf(bf, size, "%-*u", width,
> > -                     he->p_stage_cyc * he->stat.nr_events);
> > +                     he->p_stage_cyc_or_retire_lat * he->stat.nr_events);
> >  }
> >
> >
> >  static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
> >                                       size_t size, unsigned int width)
> >  {
> > -     return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
> > +     return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc_or_retire_lat);
> >  }
> >
> >  struct sort_entry sort_local_p_stage_cyc = {
> > diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
> > index 2fc4d0537840..449a41900fc4 100644
> > --- a/tools/perf/util/synthetic-events.c
> > +++ b/tools/perf/util/synthetic-events.c
> > @@ -1567,10 +1567,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
> >       return result;
> >  }
> >
> > -void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data,
> > +static void perf_synthesize_sample_weight(const struct perf_sample *data,
> >                                              __u64 *array, u64 type __maybe_unused)
> >  {
> >       *array = data->weight;
> > +
> > +     if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
> > +             *array &= 0xffffffff;
> > +             *array |= ((u64)data->ins_lat << 32);
> > +             *array |= ((u64)data->p_stage_cyc_or_retire_lat << 48);
> > +     }
>
> It works for X86, but powerpc seems have a different behavior. The
> p_stage_cyc is missed here. Not sure if it intends.

Yeah, I was assuming it was a bug on powerpc not to synthesize this
part of the sample as it is read. As evsel__parse_sample will set this
to 0 and the bits don't overlap I expect the change to either be a bug
fix or benign on powerpc.

Thanks,
Ian

> Thanks,
> Kan
>
> >  }
> >
> >  static __u64 *copy_read_group_values(__u64 *array, __u64 read_format,
> > @@ -1730,7 +1736,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
> >       }
> >
> >       if (type & PERF_SAMPLE_WEIGHT_TYPE) {
> > -             arch_perf_synthesize_sample_weight(sample, array, type);
> > +             perf_synthesize_sample_weight(sample, array, type);
> >               array++;
> >       }
> >
>