[PATCH V2 4/6] perf: Use sample_flags for weight

kan.liang@linux.intel.com posted 6 patches 3 years, 7 months ago
[PATCH V2 4/6] perf: Use sample_flags for weight
Posted by kan.liang@linux.intel.com 3 years, 7 months ago
From: Kan Liang <kan.liang@linux.intel.com>

Use the new sample_flags to indicate whether the weight field is filled
by the PMU driver.

Remove the weight field from the perf_sample_data_init() to minimize the
number of cache lines touched.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/powerpc/perf/core-book3s.c |  5 +++--
 arch/x86/events/intel/ds.c      | 10 +++++++---
 include/linux/perf_event.h      |  3 +--
 kernel/events/core.c            |  3 +++
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 1ad1efdb33f9..a5c95a2006ea 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2305,9 +2305,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
 
 		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
-						ppmu->get_mem_weight)
+						ppmu->get_mem_weight) {
 			ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
-
+			data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+		}
 		if (perf_event_overflow(event, &data, regs))
 			power_pmu_stop(event, 0);
 	} else if (period) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 5dcfd2de6ef8..f66a4905cc87 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1535,8 +1535,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 	/*
 	 * Use latency for weight (only avail with PEBS-LL)
 	 */
-	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
+	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
 		data->weight.full = pebs->lat;
+		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+	}
 
 	/*
 	 * data.data_src encodes the data source
@@ -1628,9 +1630,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 
 	if (x86_pmu.intel_cap.pebs_format >= 2) {
 		/* Only set the TSX weight when no memory weight. */
-		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
+		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
 			data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
-
+			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+		}
 		if (sample_type & PERF_SAMPLE_TRANSACTION)
 			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
 							      pebs->ax);
@@ -1772,6 +1775,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 				data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
 					intel_get_tsx_weight(meminfo->tsx_tuning);
 			}
+			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
 		}
 
 		if (sample_type & PERF_SAMPLE_DATA_SRC)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1e12e79454e0..06a587b5faa9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1012,7 +1012,6 @@ struct perf_sample_data {
 	u64				addr;
 	struct perf_raw_record		*raw;
 	u64				period;
-	union perf_sample_weight	weight;
 	u64				txn;
 	union  perf_mem_data_src	data_src;
 
@@ -1021,6 +1020,7 @@ struct perf_sample_data {
 	 * perf_{prepare,output}_sample().
 	 */
 	struct perf_branch_stack	*br_stack;
+	union perf_sample_weight	weight;
 
 	u64				type;
 	u64				ip;
@@ -1063,7 +1063,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->addr = addr;
 	data->raw  = NULL;
 	data->period = period;
-	data->weight.full = 0;
 	data->data_src.val = PERF_MEM_NA;
 	data->txn = 0;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 104c0c9f4e6f..f0af45db02b3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7408,6 +7408,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 		header->size += size;
 	}
 
+	if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
+		data->weight.full = 0;
+
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
 		/* regs dump ABI info */
 		int size = sizeof(u64);
-- 
2.35.1
Re: [PATCH V2 4/6] perf: Use sample_flags for weight
Posted by Namhyung Kim 3 years, 7 months ago
On Thu, Sep 1, 2022 at 6:10 AM <kan.liang@linux.intel.com> wrote:
>
> From: Kan Liang <kan.liang@linux.intel.com>
>
> Use the new sample_flags to indicate whether the weight field is filled
> by the PMU driver.
>
> Remove the weight field from the perf_sample_data_init() to minimize the
> number of cache lines touched.
>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
>  arch/powerpc/perf/core-book3s.c |  5 +++--
>  arch/x86/events/intel/ds.c      | 10 +++++++---
>  include/linux/perf_event.h      |  3 +--
>  kernel/events/core.c            |  3 +++
>  4 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 1ad1efdb33f9..a5c95a2006ea 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2305,9 +2305,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
>                         ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
>
>                 if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
> -                                               ppmu->get_mem_weight)
> +                                               ppmu->get_mem_weight) {
>                         ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
> -
> +                       data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
> +               }
>                 if (perf_event_overflow(event, &data, regs))
>                         power_pmu_stop(event, 0);
>         } else if (period) {
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 5dcfd2de6ef8..f66a4905cc87 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -1535,8 +1535,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
>         /*
>          * Use latency for weight (only avail with PEBS-LL)
>          */
> -       if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
> +       if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
>                 data->weight.full = pebs->lat;
> +               data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
> +       }
>
>         /*
>          * data.data_src encodes the data source
> @@ -1628,9 +1630,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
>
>         if (x86_pmu.intel_cap.pebs_format >= 2) {
>                 /* Only set the TSX weight when no memory weight. */
> -               if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
> +               if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
>                         data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
> -
> +                       data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
> +               }
>                 if (sample_type & PERF_SAMPLE_TRANSACTION)
>                         data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
>                                                               pebs->ax);
> @@ -1772,6 +1775,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
>                                 data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
>                                         intel_get_tsx_weight(meminfo->tsx_tuning);
>                         }
> +                       data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;

I was thinking about splitting PERF_SAMPLE_WEIGHT and
PERF_SAMPLE_WEIGHT_STRUCT but it'd just add complexity
unnecessarily?

Thanks,
Namhyung

>                 }
>
>                 if (sample_type & PERF_SAMPLE_DATA_SRC)
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 1e12e79454e0..06a587b5faa9 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1012,7 +1012,6 @@ struct perf_sample_data {
>         u64                             addr;
>         struct perf_raw_record          *raw;
>         u64                             period;
> -       union perf_sample_weight        weight;
>         u64                             txn;
>         union  perf_mem_data_src        data_src;
>
> @@ -1021,6 +1020,7 @@ struct perf_sample_data {
>          * perf_{prepare,output}_sample().
>          */
>         struct perf_branch_stack        *br_stack;
> +       union perf_sample_weight        weight;
>
>         u64                             type;
>         u64                             ip;
> @@ -1063,7 +1063,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>         data->addr = addr;
>         data->raw  = NULL;
>         data->period = period;
> -       data->weight.full = 0;
>         data->data_src.val = PERF_MEM_NA;
>         data->txn = 0;
>  }
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 104c0c9f4e6f..f0af45db02b3 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7408,6 +7408,9 @@ void perf_prepare_sample(struct perf_event_header *header,
>                 header->size += size;
>         }
>
> +       if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
> +               data->weight.full = 0;
> +
>         if (sample_type & PERF_SAMPLE_REGS_INTR) {
>                 /* regs dump ABI info */
>                 int size = sizeof(u64);
> --
> 2.35.1
>
Re: [PATCH V2 4/6] perf: Use sample_flags for weight
Posted by Liang, Kan 3 years, 7 months ago

On 2022-09-02 1:29 a.m., Namhyung Kim wrote:
> On Thu, Sep 1, 2022 at 6:10 AM <kan.liang@linux.intel.com> wrote:
>>
>> From: Kan Liang <kan.liang@linux.intel.com>
>>
>> Use the new sample_flags to indicate whether the weight field is filled
>> by the PMU driver.
>>
>> Remove the weight field from the perf_sample_data_init() to minimize the
>> number of cache lines touched.
>>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> ---
>>  arch/powerpc/perf/core-book3s.c |  5 +++--
>>  arch/x86/events/intel/ds.c      | 10 +++++++---
>>  include/linux/perf_event.h      |  3 +--
>>  kernel/events/core.c            |  3 +++
>>  4 files changed, 14 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>> index 1ad1efdb33f9..a5c95a2006ea 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -2305,9 +2305,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
>>                         ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
>>
>>                 if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
>> -                                               ppmu->get_mem_weight)
>> +                                               ppmu->get_mem_weight) {
>>                         ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
>> -
>> +                       data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
>> +               }
>>                 if (perf_event_overflow(event, &data, regs))
>>                         power_pmu_stop(event, 0);
>>         } else if (period) {
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index 5dcfd2de6ef8..f66a4905cc87 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -1535,8 +1535,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
>>         /*
>>          * Use latency for weight (only avail with PEBS-LL)
>>          */
>> -       if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
>> +       if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
>>                 data->weight.full = pebs->lat;
>> +               data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
>> +       }
>>
>>         /*
>>          * data.data_src encodes the data source
>> @@ -1628,9 +1630,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
>>
>>         if (x86_pmu.intel_cap.pebs_format >= 2) {
>>                 /* Only set the TSX weight when no memory weight. */
>> -               if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
>> +               if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
>>                         data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
>> -
>> +                       data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
>> +               }
>>                 if (sample_type & PERF_SAMPLE_TRANSACTION)
>>                         data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
>>                                                               pebs->ax);
>> @@ -1772,6 +1775,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
>>                                 data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
>>                                         intel_get_tsx_weight(meminfo->tsx_tuning);
>>                         }
>> +                       data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
> 
> I was thinking about splitting PERF_SAMPLE_WEIGHT and
> PERF_SAMPLE_WEIGHT_STRUCT but it'd just add complexity
> unnecessarily?

Right, I don't think it's necessary. The code path is to handle the
weight. We use the same way to handle the old and new weight flags. We
should have both set.

Thanks,
Kan
> 
> Thanks,
> Namhyung
> 
>>                 }
>>
>>                 if (sample_type & PERF_SAMPLE_DATA_SRC)
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index 1e12e79454e0..06a587b5faa9 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -1012,7 +1012,6 @@ struct perf_sample_data {
>>         u64                             addr;
>>         struct perf_raw_record          *raw;
>>         u64                             period;
>> -       union perf_sample_weight        weight;
>>         u64                             txn;
>>         union  perf_mem_data_src        data_src;
>>
>> @@ -1021,6 +1020,7 @@ struct perf_sample_data {
>>          * perf_{prepare,output}_sample().
>>          */
>>         struct perf_branch_stack        *br_stack;
>> +       union perf_sample_weight        weight;
>>
>>         u64                             type;
>>         u64                             ip;
>> @@ -1063,7 +1063,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>>         data->addr = addr;
>>         data->raw  = NULL;
>>         data->period = period;
>> -       data->weight.full = 0;
>>         data->data_src.val = PERF_MEM_NA;
>>         data->txn = 0;
>>  }
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 104c0c9f4e6f..f0af45db02b3 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -7408,6 +7408,9 @@ void perf_prepare_sample(struct perf_event_header *header,
>>                 header->size += size;
>>         }
>>
>> +       if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
>> +               data->weight.full = 0;
>> +
>>         if (sample_type & PERF_SAMPLE_REGS_INTR) {
>>                 /* regs dump ABI info */
>>                 int size = sizeof(u64);
>> --
>> 2.35.1
>>
[tip: perf/core] perf: Use sample_flags for weight
Posted by tip-bot2 for Kan Liang 3 years, 7 months ago
The following commit has been merged into the perf/core branch of tip:

Commit-ID:     2abe681da0a192ab850a5271d838a7817b469fca
Gitweb:        https://git.kernel.org/tip/2abe681da0a192ab850a5271d838a7817b469fca
Author:        Kan Liang <kan.liang@linux.intel.com>
AuthorDate:    Thu, 01 Sep 2022 06:09:57 -07:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 06 Sep 2022 11:33:02 +02:00

perf: Use sample_flags for weight

Use the new sample_flags to indicate whether the weight field is filled
by the PMU driver.

Remove the weight field from the perf_sample_data_init() to minimize the
number of cache lines touched.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220901130959.1285717-5-kan.liang@linux.intel.com
---
 arch/powerpc/perf/core-book3s.c |  5 +++--
 arch/x86/events/intel/ds.c      | 10 +++++++---
 include/linux/perf_event.h      |  3 +--
 kernel/events/core.c            |  3 +++
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 1ad1efd..a5c95a2 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2305,9 +2305,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
 
 		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
-						ppmu->get_mem_weight)
+						ppmu->get_mem_weight) {
 			ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
-
+			data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+		}
 		if (perf_event_overflow(event, &data, regs))
 			power_pmu_stop(event, 0);
 	} else if (period) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 0489f75..4c51118 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1527,8 +1527,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 	/*
 	 * Use latency for weight (only avail with PEBS-LL)
 	 */
-	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
+	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
 		data->weight.full = pebs->lat;
+		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+	}
 
 	/*
 	 * data.data_src encodes the data source
@@ -1620,9 +1622,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 
 	if (x86_pmu.intel_cap.pebs_format >= 2) {
 		/* Only set the TSX weight when no memory weight. */
-		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
+		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
 			data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
-
+			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+		}
 		if (sample_type & PERF_SAMPLE_TRANSACTION)
 			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
 							      pebs->ax);
@@ -1764,6 +1767,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 				data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
 					intel_get_tsx_weight(meminfo->tsx_tuning);
 			}
+			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
 		}
 
 		if (sample_type & PERF_SAMPLE_DATA_SRC)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1e12e79..06a587b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1012,7 +1012,6 @@ struct perf_sample_data {
 	u64				addr;
 	struct perf_raw_record		*raw;
 	u64				period;
-	union perf_sample_weight	weight;
 	u64				txn;
 	union  perf_mem_data_src	data_src;
 
@@ -1021,6 +1020,7 @@ struct perf_sample_data {
 	 * perf_{prepare,output}_sample().
 	 */
 	struct perf_branch_stack	*br_stack;
+	union perf_sample_weight	weight;
 
 	u64				type;
 	u64				ip;
@@ -1063,7 +1063,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->addr = addr;
 	data->raw  = NULL;
 	data->period = period;
-	data->weight.full = 0;
 	data->data_src.val = PERF_MEM_NA;
 	data->txn = 0;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 104c0c9..f0af45d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7408,6 +7408,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 		header->size += size;
 	}
 
+	if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
+		data->weight.full = 0;
+
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
 		/* regs dump ABI info */
 		int size = sizeof(u64);