[RESEND PATCH 09/12] perf: Extend perf_output_read

kan.liang@linux.intel.com posted 12 patches 1 year, 6 months ago
There is a newer version of this series
[RESEND PATCH 09/12] perf: Extend perf_output_read
Posted by kan.liang@linux.intel.com 1 year, 6 months ago
From: Kan Liang <kan.liang@linux.intel.com>

The event may have been updated in the PMU-specific implementation,
e.g., Intel PEBS counters snapshotting. The common code should not
read and overwrite the value.

The PERF_SAMPLE_READ in the data->sample_type can be used to detect
whether the PMU-specific value is available. If yes, avoid the
pmu->read() in the common code.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 kernel/events/core.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f908f077935..733e507948e6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7243,7 +7243,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 
 static void perf_output_read_group(struct perf_output_handle *handle,
 			    struct perf_event *event,
-			    u64 enabled, u64 running)
+			    u64 enabled, u64 running, bool read)
 {
 	struct perf_event *leader = event->group_leader, *sub;
 	u64 read_format = event->attr.read_format;
@@ -7265,7 +7265,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] = running;
 
-	if ((leader != event) &&
+	if ((leader != event) && read &&
 	    (leader->state == PERF_EVENT_STATE_ACTIVE))
 		leader->pmu->read(leader);
 
@@ -7280,7 +7280,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 	for_each_sibling_event(sub, leader) {
 		n = 0;
 
-		if ((sub != event) &&
+		if ((sub != event) && read &&
 		    (sub->state == PERF_EVENT_STATE_ACTIVE))
 			sub->pmu->read(sub);
 
@@ -7307,7 +7307,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
  * on another CPU, from interrupt/NMI context.
  */
 static void perf_output_read(struct perf_output_handle *handle,
-			     struct perf_event *event)
+			     struct perf_event *event,
+			     bool read)
 {
 	u64 enabled = 0, running = 0, now;
 	u64 read_format = event->attr.read_format;
@@ -7325,7 +7326,7 @@ static void perf_output_read(struct perf_output_handle *handle,
 		calc_timer_values(event, &now, &enabled, &running);
 
 	if (event->attr.read_format & PERF_FORMAT_GROUP)
-		perf_output_read_group(handle, event, enabled, running);
+		perf_output_read_group(handle, event, enabled, running, read);
 	else
 		perf_output_read_one(handle, event, enabled, running);
 }
@@ -7367,7 +7368,7 @@ void perf_output_sample(struct perf_output_handle *handle,
 		perf_output_put(handle, data->period);
 
 	if (sample_type & PERF_SAMPLE_READ)
-		perf_output_read(handle, event);
+		perf_output_read(handle, event, !(data->sample_flags & PERF_SAMPLE_READ));
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size = 1;
@@ -7968,7 +7969,7 @@ perf_event_read_event(struct perf_event *event,
 		return;
 
 	perf_output_put(&handle, read_event);
-	perf_output_read(&handle, event);
+	perf_output_read(&handle, event, true);
 	perf_event__output_id_sample(event, &handle, &sample);
 
 	perf_output_end(&handle);
-- 
2.35.1
Re: [RESEND PATCH 09/12] perf: Extend perf_output_read
Posted by Peter Zijlstra 1 year, 5 months ago
On Tue, Jun 18, 2024 at 08:10:41AM -0700, kan.liang@linux.intel.com wrote:
> From: Kan Liang <kan.liang@linux.intel.com>
> 
> The event may have been updated in the PMU-specific implementation,
> e.g., Intel PEBS counters snapshotting. The common code should not
> read and overwrite the value.
> 
> The PERF_SAMPLE_READ in the data->sample_type can be used to detect
> whether the PMU-specific value is available. If yes, avoid the
> pmu->read() in the common code.
> 
> Reviewed-by: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
>  kernel/events/core.c | 15 ++++++++-------
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 8f908f077935..733e507948e6 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7243,7 +7243,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
>  
>  static void perf_output_read_group(struct perf_output_handle *handle,
>  			    struct perf_event *event,
> -			    u64 enabled, u64 running)
> +			    u64 enabled, u64 running, bool read)
>  {
>  	struct perf_event *leader = event->group_leader, *sub;
>  	u64 read_format = event->attr.read_format;
> @@ -7265,7 +7265,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
>  	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
>  		values[n++] = running;
>  
> -	if ((leader != event) &&
> +	if ((leader != event) && read &&
>  	    (leader->state == PERF_EVENT_STATE_ACTIVE))
>  		leader->pmu->read(leader);
>  
> @@ -7280,7 +7280,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
>  	for_each_sibling_event(sub, leader) {
>  		n = 0;
>  
> -		if ((sub != event) &&
> +		if ((sub != event) && read &&
>  		    (sub->state == PERF_EVENT_STATE_ACTIVE))
>  			sub->pmu->read(sub);
>  
> @@ -7307,7 +7307,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
>   * on another CPU, from interrupt/NMI context.
>   */
>  static void perf_output_read(struct perf_output_handle *handle,
> -			     struct perf_event *event)
> +			     struct perf_event *event,
> +			     bool read)
>  {
>  	u64 enabled = 0, running = 0, now;
>  	u64 read_format = event->attr.read_format;
> @@ -7325,7 +7326,7 @@ static void perf_output_read(struct perf_output_handle *handle,
>  		calc_timer_values(event, &now, &enabled, &running);
>  
>  	if (event->attr.read_format & PERF_FORMAT_GROUP)
> -		perf_output_read_group(handle, event, enabled, running);
> +		perf_output_read_group(handle, event, enabled, running, read);
>  	else
>  		perf_output_read_one(handle, event, enabled, running);
>  }
> @@ -7367,7 +7368,7 @@ void perf_output_sample(struct perf_output_handle *handle,
>  		perf_output_put(handle, data->period);
>  
>  	if (sample_type & PERF_SAMPLE_READ)
> -		perf_output_read(handle, event);
> +		perf_output_read(handle, event, !(data->sample_flags & PERF_SAMPLE_READ));
>  
>  	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
>  		int size = 1;

this can't be right. The output is order sensitive. If a
PERF_SAMPLE_READ is part of the PERF_RECORD_SAMPLE, it must be in this
location.
Re: [RESEND PATCH 09/12] perf: Extend perf_output_read
Posted by Peter Zijlstra 1 year, 5 months ago
On Thu, Jun 20, 2024 at 11:00:28AM +0200, Peter Zijlstra wrote:
> On Tue, Jun 18, 2024 at 08:10:41AM -0700, kan.liang@linux.intel.com wrote:
> > From: Kan Liang <kan.liang@linux.intel.com>
> > 
> > The event may have been updated in the PMU-specific implementation,
> > e.g., Intel PEBS counters snapshotting. The common code should not
> > read and overwrite the value.
> > 
> > The PERF_SAMPLE_READ in the data->sample_type can be used to detect
> > whether the PMU-specific value is available. If yes, avoid the
> > pmu->read() in the common code.
> > 
> > Reviewed-by: Andi Kleen <ak@linux.intel.com>
> > Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> > ---
> >  kernel/events/core.c | 15 ++++++++-------
> >  1 file changed, 8 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index 8f908f077935..733e507948e6 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -7243,7 +7243,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
> >  
> >  static void perf_output_read_group(struct perf_output_handle *handle,
> >  			    struct perf_event *event,
> > -			    u64 enabled, u64 running)
> > +			    u64 enabled, u64 running, bool read)
> >  {
> >  	struct perf_event *leader = event->group_leader, *sub;
> >  	u64 read_format = event->attr.read_format;
> > @@ -7265,7 +7265,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> >  	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> >  		values[n++] = running;
> >  
> > -	if ((leader != event) &&
> > +	if ((leader != event) && read &&
> >  	    (leader->state == PERF_EVENT_STATE_ACTIVE))
> >  		leader->pmu->read(leader);
> >  
> > @@ -7280,7 +7280,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> >  	for_each_sibling_event(sub, leader) {
> >  		n = 0;
> >  
> > -		if ((sub != event) &&
> > +		if ((sub != event) && read &&
> >  		    (sub->state == PERF_EVENT_STATE_ACTIVE))
> >  			sub->pmu->read(sub);
> >  
> > @@ -7307,7 +7307,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> >   * on another CPU, from interrupt/NMI context.
> >   */
> >  static void perf_output_read(struct perf_output_handle *handle,
> > -			     struct perf_event *event)
> > +			     struct perf_event *event,
> > +			     bool read)
> >  {
> >  	u64 enabled = 0, running = 0, now;
> >  	u64 read_format = event->attr.read_format;
> > @@ -7325,7 +7326,7 @@ static void perf_output_read(struct perf_output_handle *handle,
> >  		calc_timer_values(event, &now, &enabled, &running);
> >  
> >  	if (event->attr.read_format & PERF_FORMAT_GROUP)
> > -		perf_output_read_group(handle, event, enabled, running);
> > +		perf_output_read_group(handle, event, enabled, running, read);
> >  	else
> >  		perf_output_read_one(handle, event, enabled, running);
> >  }
> > @@ -7367,7 +7368,7 @@ void perf_output_sample(struct perf_output_handle *handle,
> >  		perf_output_put(handle, data->period);
> >  
> >  	if (sample_type & PERF_SAMPLE_READ)
> > -		perf_output_read(handle, event);
> > +		perf_output_read(handle, event, !(data->sample_flags & PERF_SAMPLE_READ));
> >  
> >  	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
> >  		int size = 1;
> 
> this can't be right. The output is order sensitive. If a
> PERF_SAMPLE_READ is part of the PERF_RECORD_SAMPLE, it must be in this
> location.

Oh, n/n. I read that wrong. I'll try again after a break.