[PATCH 2/7] perf: Add PERF_EV_CAP_READ_SCOPE

kan.liang@linux.intel.com posted 7 patches 1 year, 4 months ago
[PATCH 2/7] perf: Add PERF_EV_CAP_READ_SCOPE
Posted by kan.liang@linux.intel.com 1 year, 4 months ago
From: Kan Liang <kan.liang@linux.intel.com>

Usually, an event can be read from any CPU of the scope. It doesn't need
to be read from the advertised CPU.

Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with
scope can be read from any active CPU in the scope.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 include/linux/perf_event.h |  3 +++
 kernel/events/core.c       | 14 +++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1102d5c2be70..1206bc86eb4f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -633,10 +633,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
  * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
  * cannot be a group leader. If an event with this flag is detached from the
  * group it is scheduled out and moved into an unrecoverable ERROR state.
+ * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
+ * PMU scope where it is active.
  */
 #define PERF_EV_CAP_SOFTWARE		BIT(0)
 #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
 #define PERF_EV_CAP_SIBLING		BIT(2)
+#define PERF_EV_CAP_READ_SCOPE		BIT(3)
 
 #define SWEVENT_HLIST_BITS		8
 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5e1877c4cb4c..c55294f34575 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4463,16 +4463,24 @@ struct perf_read_data {
 	int ret;
 };
 
+static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu);
+
 static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
 {
+	int local_cpu = smp_processor_id();
 	u16 local_pkg, event_pkg;
 
 	if ((unsigned)event_cpu >= nr_cpu_ids)
 		return event_cpu;
 
-	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
-		int local_cpu = smp_processor_id();
+	if (event->group_caps & PERF_EV_CAP_READ_SCOPE) {
+		const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(event->pmu->scope, event_cpu);
+
+		if (cpumask && cpumask_test_cpu(local_cpu, cpumask))
+			return local_cpu;
+	}
 
+	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
 		event_pkg = topology_physical_package_id(event_cpu);
 		local_pkg = topology_physical_package_id(local_cpu);
 
@@ -11804,7 +11812,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 				if (cpu >= nr_cpu_ids)
 					ret = -ENODEV;
 				else
-					event->cpu = cpu;
+					event->event_caps |= PERF_EV_CAP_READ_SCOPE;
 			} else {
 				ret = -ENODEV;
 			}
-- 
2.38.1
Re: [PATCH 2/7] perf: Add PERF_EV_CAP_READ_SCOPE
Posted by Peter Zijlstra 1 year, 3 months ago
On Fri, Aug 02, 2024 at 08:16:38AM -0700, kan.liang@linux.intel.com wrote:
> From: Kan Liang <kan.liang@linux.intel.com>
> 
> Usually, an event can be read from any CPU of the scope. It doesn't need
> to be read from the advertised CPU.
> 
> Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with
> scope can be read from any active CPU in the scope.
> 
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
>  include/linux/perf_event.h |  3 +++
>  kernel/events/core.c       | 14 +++++++++++---
>  2 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 1102d5c2be70..1206bc86eb4f 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -633,10 +633,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
>   * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
>   * cannot be a group leader. If an event with this flag is detached from the
>   * group it is scheduled out and moved into an unrecoverable ERROR state.
> + * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
> + * PMU scope where it is active.
>   */
>  #define PERF_EV_CAP_SOFTWARE		BIT(0)
>  #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
>  #define PERF_EV_CAP_SIBLING		BIT(2)
> +#define PERF_EV_CAP_READ_SCOPE		BIT(3)
>  
>  #define SWEVENT_HLIST_BITS		8
>  #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 5e1877c4cb4c..c55294f34575 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4463,16 +4463,24 @@ struct perf_read_data {
>  	int ret;
>  };
>  
> +static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu);
> +
>  static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
>  {
> +	int local_cpu = smp_processor_id();
>  	u16 local_pkg, event_pkg;
>  
>  	if ((unsigned)event_cpu >= nr_cpu_ids)
>  		return event_cpu;
>  
> -	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
> -		int local_cpu = smp_processor_id();
> +	if (event->group_caps & PERF_EV_CAP_READ_SCOPE) {
> +		const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(event->pmu->scope, event_cpu);
> +
> +		if (cpumask && cpumask_test_cpu(local_cpu, cpumask))
> +			return local_cpu;
> +	}
>  
> +	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {

I'm guessing the goal is to eventually remove this one, right?
Re: [PATCH 2/7] perf: Add PERF_EV_CAP_READ_SCOPE
Posted by Liang, Kan 1 year, 3 months ago

On 2024-09-06 11:11 a.m., Peter Zijlstra wrote:
> On Fri, Aug 02, 2024 at 08:16:38AM -0700, kan.liang@linux.intel.com wrote:
>> From: Kan Liang <kan.liang@linux.intel.com>
>>
>> Usually, an event can be read from any CPU of the scope. It doesn't need
>> to be read from the advertised CPU.
>>
>> Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with
>> scope can be read from any active CPU in the scope.
>>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> ---
>>  include/linux/perf_event.h |  3 +++
>>  kernel/events/core.c       | 14 +++++++++++---
>>  2 files changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index 1102d5c2be70..1206bc86eb4f 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -633,10 +633,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
>>   * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
>>   * cannot be a group leader. If an event with this flag is detached from the
>>   * group it is scheduled out and moved into an unrecoverable ERROR state.
>> + * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
>> + * PMU scope where it is active.
>>   */
>>  #define PERF_EV_CAP_SOFTWARE		BIT(0)
>>  #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
>>  #define PERF_EV_CAP_SIBLING		BIT(2)
>> +#define PERF_EV_CAP_READ_SCOPE		BIT(3)
>>  
>>  #define SWEVENT_HLIST_BITS		8
>>  #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 5e1877c4cb4c..c55294f34575 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -4463,16 +4463,24 @@ struct perf_read_data {
>>  	int ret;
>>  };
>>  
>> +static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu);
>> +
>>  static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
>>  {
>> +	int local_cpu = smp_processor_id();
>>  	u16 local_pkg, event_pkg;
>>  
>>  	if ((unsigned)event_cpu >= nr_cpu_ids)
>>  		return event_cpu;
>>  
>> -	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
>> -		int local_cpu = smp_processor_id();
>> +	if (event->group_caps & PERF_EV_CAP_READ_SCOPE) {
>> +		const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(event->pmu->scope, event_cpu);
>> +
>> +		if (cpumask && cpumask_test_cpu(local_cpu, cpumask))
>> +			return local_cpu;
>> +	}
>>  
>> +	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
> 
> I'm guessing the goal is to eventually remove this one, right?

Yes, after I have the uncore cleaned up. It should be able to remove the
PERF_EV_CAP_READ_ACTIVE_PKG.

Thanks,
Kan
>
[tip: perf/core] perf: Add PERF_EV_CAP_READ_SCOPE
Posted by tip-bot2 for Kan Liang 1 year, 3 months ago
The following commit has been merged into the perf/core branch of tip:

Commit-ID:     a48a36b316ae5d3ab83f9b545dba15998e96d59c
Gitweb:        https://git.kernel.org/tip/a48a36b316ae5d3ab83f9b545dba15998e96d59c
Author:        Kan Liang <kan.liang@linux.intel.com>
AuthorDate:    Fri, 02 Aug 2024 08:16:38 -07:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 10 Sep 2024 11:44:13 +02:00

perf: Add PERF_EV_CAP_READ_SCOPE

Usually, an event can be read from any CPU of the scope. It doesn't need
to be read from the advertised CPU.

Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with
scope can be read from any active CPU in the scope.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240802151643.1691631-3-kan.liang@linux.intel.com
---
 include/linux/perf_event.h |  3 +++
 kernel/events/core.c       | 14 +++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a3cbcd7..794f660 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -636,10 +636,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
  * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
  * cannot be a group leader. If an event with this flag is detached from the
  * group it is scheduled out and moved into an unrecoverable ERROR state.
+ * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
+ * PMU scope where it is active.
  */
 #define PERF_EV_CAP_SOFTWARE		BIT(0)
 #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
 #define PERF_EV_CAP_SIBLING		BIT(2)
+#define PERF_EV_CAP_READ_SCOPE		BIT(3)
 
 #define SWEVENT_HLIST_BITS		8
 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5ff9735..2766090 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4556,16 +4556,24 @@ struct perf_read_data {
 	int ret;
 };
 
+static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu);
+
 static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
 {
+	int local_cpu = smp_processor_id();
 	u16 local_pkg, event_pkg;
 
 	if ((unsigned)event_cpu >= nr_cpu_ids)
 		return event_cpu;
 
-	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
-		int local_cpu = smp_processor_id();
+	if (event->group_caps & PERF_EV_CAP_READ_SCOPE) {
+		const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(event->pmu->scope, event_cpu);
+
+		if (cpumask && cpumask_test_cpu(local_cpu, cpumask))
+			return local_cpu;
+	}
 
+	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
 		event_pkg = topology_physical_package_id(event_cpu);
 		local_pkg = topology_physical_package_id(local_cpu);
 
@@ -11905,7 +11913,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 				if (cpu >= nr_cpu_ids)
 					ret = -ENODEV;
 				else
-					event->cpu = cpu;
+					event->event_caps |= PERF_EV_CAP_READ_SCOPE;
 			} else {
 				ret = -ENODEV;
 			}