From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1978617837A
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:25 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339746; cv=none;
 b=KNqVO2eUE99thSw/b8RRYqNX48uy4fWjJfnGz2p7eBFj2kKNwQ3PGUFdBXfVCjbX8Yih9j6Lp4VyhLfuA091fLSC7+i+O77u+2f6EWZpifVEP7mC/CxqIE0FgvitC31sAXjul2d4+TYj+XlQry/fWjZdNf+AVCAgYccSXNoemcs=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339746; c=relaxed/simple;
	bh=Fqb1OthpKdMcPaKdMzcFWrQslQh1s15VddsPT9J3Cu4=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=maThhE2zcDsv4OrRaVziKHCaOD7ACOjFD/ZQ8LoDGUsbxgmF8t4UuXqM4LJH7csWclxw15z3HUUwzun6BY/5CLyaTD1aUD0camddJu7QBdkdIqYhx5sPmmpYtz+012v9mVOhuBDSHV+Xu4xAmqgHjJjezG948q3G7fqonzFb9u0=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=Pi+g1Aeg; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="Pi+g1Aeg"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339745; x=1750875745;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=Fqb1OthpKdMcPaKdMzcFWrQslQh1s15VddsPT9J3Cu4=;
  b=Pi+g1Aeg9VnDz9/a9AHlc1iKFVCLbEQSyxCOjOofw0yAMyQ6hxFja4CY
   PgcHXfuCGEQqdqYh7Hmyn/Dadfn6hQ4RFmRn1xrCfkj3M2VGieO9a0wo2
   A7eB7i5+Sm5uh+UGIkvuagwM5JPduuxwbWLqU7hSjRqxRt83UX01/JL9p
   3S10KN/j/RPexqH6B3LvLOEvOcZGo1QJiz0tHRdV+pgciWmkol/dahWuZ
   2hI/lmaXQ9fAdJu5vq1EpKDzjuqqYGWJBHehw6r/u8iPcjX6UfgNDbtdl
   XmWmVQP5dA3S6i2CyNw3q4DgqigkfNc/A1AepWikgDfX7FmOToSBxppuV
   A==;
X-CSE-ConnectionGUID: aBjZsT2KSpm8eQ9IOsBGoA==
X-CSE-MsgGUID: ose/IumHRpWrGXM1otWtEQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16203990"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16203990"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:21 -0700
X-CSE-ConnectionGUID: cCQ5cesqSKeW5sWCFGDaLg==
X-CSE-MsgGUID: dcyluiy7RpuWA7/GqZZfGw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913341"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:06 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 01/13] perf/x86/intel: Support the PEBS event mask
Date: Tue, 25 Jun 2024 11:22:44 -0700
Message-Id: <20240625182256.291914-2-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The current perf assumes that the counters that support PEBS are
contiguous. But it's not guaranteed with the new leaf 0x23 introduced.
The counters are enumerated with a counter mask. There may be holes in
the counter mask for future platforms or in a virtualization
environment.

Store the PEBS event mask rather than the maximum number of PEBS
counters in the x86 PMU structures.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/intel/core.c    |  8 ++++----
 arch/x86/events/intel/ds.c      | 15 ++++++++-------
 arch/x86/events/perf_event.h    | 14 ++++++++++++--
 arch/x86/include/asm/intel_ds.h |  1 +
 4 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 38c1b1f1deaa..c27a9f75defb 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4728,7 +4728,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hy=
brid_pmu *pmu)
 {
 	intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
 				     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
-	pmu->max_pebs_events =3D min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counte=
rs);
+	pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counte=
rs - 1, 0));
 	pmu->unconstrained =3D (struct event_constraint)
 			     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 						0, pmu->num_counters, 0, 0);
@@ -6070,7 +6070,7 @@ static __always_inline int intel_pmu_init_hybrid(enum=
 hybrid_pmu_type pmus)
=20
 		pmu->num_counters =3D x86_pmu.num_counters;
 		pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
-		pmu->max_pebs_events =3D min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_count=
ers);
+		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_count=
ers - 1, 0));
 		pmu->unconstrained =3D (struct event_constraint)
 				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 							0, pmu->num_counters, 0, 0);
@@ -6193,7 +6193,7 @@ __init int intel_pmu_init(void)
 	x86_pmu.events_maskl		=3D ebx.full;
 	x86_pmu.events_mask_len		=3D eax.split.mask_length;
=20
-	x86_pmu.max_pebs_events		=3D min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num=
_counters);
+	x86_pmu.pebs_events_mask	=3D intel_pmu_pebs_mask(GENMASK_ULL(x86_pmu.num_=
counters - 1, 0));
 	x86_pmu.pebs_capable		=3D PEBS_COUNTER_MASK;
=20
 	/*
@@ -6822,7 +6822,7 @@ __init int intel_pmu_init(void)
 			pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
 		}
=20
-		pmu->max_pebs_events =3D min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_count=
ers);
+		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_count=
ers - 1, 0));
 		pmu->unconstrained =3D (struct event_constraint)
 					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 							   0, pmu->num_counters, 0, 0);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index e010bfed8417..f6105b8dcf87 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1137,7 +1137,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_pmu_=
context *pmu_ctx, bool sche
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds =3D cpuc->ds;
-	int max_pebs_events =3D hybrid(cpuc->pmu, max_pebs_events);
+	int max_pebs_events =3D intel_pmu_max_num_pebs(cpuc->pmu);
 	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	u64 threshold;
 	int reserved;
@@ -2157,6 +2157,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *=
iregs, struct perf_sample_d
 	void *base, *at, *top;
 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] =3D {};
 	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] =3D {};
+	int max_pebs_events =3D intel_pmu_max_num_pebs(NULL);
 	int bit, i, size;
 	u64 mask;
=20
@@ -2168,8 +2169,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *=
iregs, struct perf_sample_d
=20
 	ds->pebs_index =3D ds->pebs_buffer_base;
=20
-	mask =3D (1ULL << x86_pmu.max_pebs_events) - 1;
-	size =3D x86_pmu.max_pebs_events;
+	mask =3D x86_pmu.pebs_events_mask;
+	size =3D max_pebs_events;
 	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
 		mask |=3D ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FI=
XED;
 		size =3D INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
@@ -2208,8 +2209,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *=
iregs, struct perf_sample_d
 			pebs_status =3D p->status =3D cpuc->pebs_enabled;
=20
 		bit =3D find_first_bit((unsigned long *)&pebs_status,
-					x86_pmu.max_pebs_events);
-		if (bit >=3D x86_pmu.max_pebs_events)
+				     max_pebs_events);
+
+		if (!(x86_pmu.pebs_events_mask & (1 << bit)))
 			continue;
=20
 		/*
@@ -2267,7 +2269,6 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *=
iregs, struct perf_sample_d
 {
 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] =3D {};
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
-	int max_pebs_events =3D hybrid(cpuc->pmu, max_pebs_events);
 	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	struct debug_store *ds =3D cpuc->ds;
 	struct perf_event *event;
@@ -2283,7 +2284,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *=
iregs, struct perf_sample_d
=20
 	ds->pebs_index =3D ds->pebs_buffer_base;
=20
-	mask =3D ((1ULL << max_pebs_events) - 1) |
+	mask =3D hybrid(cpuc->pmu, pebs_events_mask) |
 	       (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
 	size =3D INTEL_PMC_IDX_FIXED + num_counters_fixed;
=20
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 72b022a1e16c..0e411539f88a 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -684,7 +684,7 @@ struct x86_hybrid_pmu {
 	cpumask_t			supported_cpus;
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
-	int				max_pebs_events;
+	u64				pebs_events_mask;
 	int				num_counters;
 	int				num_counters_fixed;
 	struct event_constraint		unconstrained;
@@ -852,7 +852,7 @@ struct x86_pmu {
 			pebs_ept		:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
-	int		max_pebs_events;
+	u64		pebs_events_mask;
 	void		(*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
@@ -1661,6 +1661,16 @@ static inline int is_ht_workaround_enabled(void)
 	return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
 }
=20
+static inline u64 intel_pmu_pebs_mask(u64 cntr_mask)
+{
+	return MAX_PEBS_EVENTS_MASK & cntr_mask;
+}
+
+static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
+{
+	return find_last_bit((unsigned long *)&hybrid(pmu, pebs_events_mask), MAX=
_PEBS_EVENTS) + 1;
+}
+
 #else /* CONFIG_CPU_SUP_INTEL */
=20
 static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_d=
s.h
index 2f9eeb5c3069..5dbeac48a5b9 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -9,6 +9,7 @@
 /* The maximal number of PEBS events: */
 #define MAX_PEBS_EVENTS_FMT4	8
 #define MAX_PEBS_EVENTS		32
+#define MAX_PEBS_EVENTS_MASK	GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
 #define MAX_FIXED_PEBS_EVENTS	16
=20
 /*
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 15D4617CA1C
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:43 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339765; cv=none;
 b=sSdYkroP2jsmWFlzwNi4PhaGL9LwMT6X2ieHEFBk4j8AL9CiOFok9hJreSUjRymg9QnreihK1E3veSMeQ3lk+NYbNiuO80wRNwllhKJXYCNvLWmoXbY3etdacjRh/wadsv5wNCbLkey0BRG/AAtrzcIYbU463+hLS49LEwWyOPU=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339765; c=relaxed/simple;
	bh=uYIwHvJRpwRPbbBrrvFw/7K2OixgmOGTNy5ZZw584sM=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=FGFunSekIhNN6QJgcAZkOcRs43ZCp4J4X9FTskn8qmwrbBD8RhapMft84V8a5io0QzVDiYzXqDDdFv3koUFIbQOnDeoRFA5oQO8lstW3FhY4iohq4h5ymVDE4sCtPejE66y5YpduUk0k4mYy4U4WmuKMOLtzpmSPBFatrbOJfqo=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=AOtF3FwZ; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="AOtF3FwZ"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339763; x=1750875763;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=uYIwHvJRpwRPbbBrrvFw/7K2OixgmOGTNy5ZZw584sM=;
  b=AOtF3FwZf51Csjdw5QtCxTklqk50d8FEvfnFIlUXeUNOyr8EIYWvcJay
   eat31Tn6C+/q+5073LF2kKKdjWGPjwa28JMEwf+WOGW1/fVhJiUmNGpI4
   wTT5g4PAsImHYc7zlMuOdFdae3B4/vQA2D4z/igdI2RXNxmlz6Ws5FsCf
   o628a93IThcd4yVKCj7RI5BnGbE0japaTppM9p2Cwp2zMVU6y5VcFCdPp
   sdjjOWqDBGNmQEw/zUdRRdyklgxbzWAPeIlAaPlE+PTsJgyHWRshxbZMC
   z9LUghlQURpah8Lf0KDS0DMHqa5D/TgM+9cemh+uFBpo89a9R4Bvkqkyl
   g==;
X-CSE-ConnectionGUID: I/31zDlCTdKBxRWb5iZePg==
X-CSE-MsgGUID: YE45pXwuSpe5sMKr9CmNqQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204041"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204041"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: f/oCkWQyQDOszTG6T3oveA==
X-CSE-MsgGUID: mZ3AyGfvTSqt6+67INVmEg==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913342"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:06 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>,
	Sandipan Das <sandipan.das@amd.com>,
	Ravi Bangoria <ravi.bangoria@amd.com>,
	silviazhao <silviazhao-oc@zhaoxin.com>,
	CodyYao-oc <CodyYao-oc@zhaoxin.com>
Subject: [PATCH V2 02/13] perf/x86: Support counter mask
Date: Tue, 25 Jun 2024 11:22:45 -0700
Message-Id: <20240625182256.291914-3-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The current perf assumes that both GP and fixed counters are contiguous.
But it's not guaranteed on newer Intel platforms or in a virtualization
environment.

Use the counter mask to replace the number of counters for both GP and
the fixed counters. For the other ARCHs or old platforms which don't
support a counter mask, using GENMASK_ULL(num_counter - 1, 0) to
replace. There is no functional change for them.

The interface to KVM is not changed. The number of counters still be
passed to KVM. It can be updated later separately.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: silviazhao <silviazhao-oc@zhaoxin.com>
Cc: CodyYao-oc <CodyYao-oc@zhaoxin.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/amd/core.c     |  24 ++---
 arch/x86/events/core.c         |  98 ++++++++++----------
 arch/x86/events/intel/core.c   | 164 ++++++++++++++++-----------------
 arch/x86/events/intel/ds.c     |  19 ++--
 arch/x86/events/intel/knc.c    |   2 +-
 arch/x86/events/intel/p4.c     |  10 +-
 arch/x86/events/intel/p6.c     |   2 +-
 arch/x86/events/perf_event.h   |  47 ++++++++--
 arch/x86/events/zhaoxin/core.c |  12 +--
 9 files changed, 199 insertions(+), 179 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 1fc4ce44e743..42a9f97a9aed 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -432,7 +432,7 @@ static void __amd_put_nb_event_constraints(struct cpu_h=
w_events *cpuc,
 	 * be removed on one CPU at a time AND PMU is disabled
 	 * when we come here
 	 */
-	for (i =3D 0; i < x86_pmu.num_counters; i++) {
+	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		if (cmpxchg(nb->owners + i, event, NULL) =3D=3D event)
 			break;
 	}
@@ -499,7 +499,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cp=
uc, struct perf_event *ev
 	 * because of successive calls to x86_schedule_events() from
 	 * hw_perf_group_sched_in() without hw_perf_enable()
 	 */
-	for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+	for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
 		if (new =3D=3D -1 || hwc->idx =3D=3D idx)
 			/* assign free slot, prefer hwc->idx */
 			old =3D cmpxchg(nb->owners + idx, NULL, event);
@@ -542,7 +542,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
 	/*
 	 * initialize all possible NB constraints
 	 */
-	for (i =3D 0; i < x86_pmu.num_counters; i++) {
+	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		__set_bit(i, nb->event_constraints[i].idxmsk);
 		nb->event_constraints[i].weight =3D 1;
 	}
@@ -735,7 +735,7 @@ static void amd_pmu_check_overflow(void)
 	 * counters are always enabled when this function is called and
 	 * ARCH_PERFMON_EVENTSEL_INT is always set.
 	 */
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
=20
@@ -755,7 +755,7 @@ static void amd_pmu_enable_all(int added)
=20
 	amd_brs_enable_all();
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		/* only activate events which are marked as active */
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -978,7 +978,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 	/* Clear any reserved bits set by buggy microcode */
 	status &=3D amd_pmu_global_cntr_mask;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
=20
@@ -1313,7 +1313,7 @@ static __initconst const struct x86_pmu amd_pmu =3D {
 	.addr_offset            =3D amd_pmu_addr_offset,
 	.event_map		=3D amd_pmu_event_map,
 	.max_events		=3D ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		=3D AMD64_NUM_COUNTERS,
+	.cntr_mask64		=3D GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
 	.add			=3D amd_pmu_add_event,
 	.del			=3D amd_pmu_del_event,
 	.cntval_bits		=3D 48,
@@ -1412,7 +1412,7 @@ static int __init amd_core_pmu_init(void)
 	 */
 	x86_pmu.eventsel	=3D MSR_F15H_PERF_CTL;
 	x86_pmu.perfctr		=3D MSR_F15H_PERF_CTR;
-	x86_pmu.num_counters	=3D AMD64_NUM_COUNTERS_CORE;
+	x86_pmu.cntr_mask64	=3D GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
=20
 	/* Check for Performance Monitoring v2 support */
 	if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1422,9 +1422,9 @@ static int __init amd_core_pmu_init(void)
 		x86_pmu.version =3D 2;
=20
 		/* Find the number of available Core PMCs */
-		x86_pmu.num_counters =3D ebx.split.num_core_pmc;
+		x86_pmu.cntr_mask64 =3D GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
=20
-		amd_pmu_global_cntr_mask =3D (1ULL << x86_pmu.num_counters) - 1;
+		amd_pmu_global_cntr_mask =3D x86_pmu.cntr_mask64;
=20
 		/* Update PMC handling functions */
 		x86_pmu.enable_all =3D amd_pmu_v2_enable_all;
@@ -1452,12 +1452,12 @@ static int __init amd_core_pmu_init(void)
 		 * even numbered counter that has a consecutive adjacent odd
 		 * numbered counter following it.
 		 */
-		for (i =3D 0; i < x86_pmu.num_counters - 1; i +=3D 2)
+		for (i =3D 0; i < x86_pmu_max_num_counters(NULL) - 1; i +=3D 2)
 			even_ctr_mask |=3D BIT_ULL(i);
=20
 		pair_constraint =3D (struct event_constraint)
 				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
-				    x86_pmu.num_counters / 2, 0,
+				    x86_pmu_max_num_counters(NULL) / 2, 0,
 				    PERF_X86_EVENT_PAIR);
=20
 		x86_pmu.get_event_constraints =3D amd_get_event_constraints_f17h;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5b0dd07b1ef1..848dbe9cbd0e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -189,29 +189,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
=20
 #ifdef CONFIG_X86_LOCAL_APIC
=20
-static inline int get_possible_num_counters(void)
+static inline u64 get_possible_counter_mask(void)
 {
-	int i, num_counters =3D x86_pmu.num_counters;
+	u64 cntr_mask =3D x86_pmu.cntr_mask64;
+	int i;
=20
 	if (!is_hybrid())
-		return num_counters;
+		return cntr_mask;
=20
 	for (i =3D 0; i < x86_pmu.num_hybrid_pmus; i++)
-		num_counters =3D max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_coun=
ters);
+		cntr_mask |=3D x86_pmu.hybrid_pmu[i].cntr_mask64;
=20
-	return num_counters;
+	return cntr_mask;
 }
=20
 static bool reserve_pmc_hardware(void)
 {
-	int i, num_counters =3D get_possible_num_counters();
+	u64 cntr_mask =3D get_possible_counter_mask();
+	int i, end;
=20
-	for (i =3D 0; i < num_counters; i++) {
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
 		if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
 			goto perfctr_fail;
 	}
=20
-	for (i =3D 0; i < num_counters; i++) {
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
 		if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
 			goto eventsel_fail;
 	}
@@ -219,13 +221,14 @@ static bool reserve_pmc_hardware(void)
 	return true;
=20
 eventsel_fail:
-	for (i--; i >=3D 0; i--)
+	end =3D i;
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
 		release_evntsel_nmi(x86_pmu_config_addr(i));
-
-	i =3D num_counters;
+	i =3D X86_PMC_IDX_MAX;
=20
 perfctr_fail:
-	for (i--; i >=3D 0; i--)
+	end =3D i;
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
 		release_perfctr_nmi(x86_pmu_event_addr(i));
=20
 	return false;
@@ -233,9 +236,10 @@ static bool reserve_pmc_hardware(void)
=20
 static void release_pmc_hardware(void)
 {
-	int i, num_counters =3D get_possible_num_counters();
+	u64 cntr_mask =3D get_possible_counter_mask();
+	int i;
=20
-	for (i =3D 0; i < num_counters; i++) {
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
 		release_perfctr_nmi(x86_pmu_event_addr(i));
 		release_evntsel_nmi(x86_pmu_config_addr(i));
 	}
@@ -248,7 +252,8 @@ static void release_pmc_hardware(void) {}
=20
 #endif
=20
-bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_f=
ixed)
+bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
+		     unsigned long *fixed_cntr_mask)
 {
 	u64 val, val_fail =3D -1, val_new=3D ~0;
 	int i, reg, reg_fail =3D -1, ret =3D 0;
@@ -259,7 +264,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters,=
 int num_counters_fixed)
 	 * Check to see if the BIOS enabled any of the counters, if so
 	 * complain and bail.
 	 */
-	for (i =3D 0; i < num_counters; i++) {
+	for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
 		reg =3D x86_pmu_config_addr(i);
 		ret =3D rdmsrl_safe(reg, &val);
 		if (ret)
@@ -273,12 +278,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counter=
s, int num_counters_fixed)
 		}
 	}
=20
-	if (num_counters_fixed) {
+	if (*(u64 *)fixed_cntr_mask) {
 		reg =3D MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 		ret =3D rdmsrl_safe(reg, &val);
 		if (ret)
 			goto msr_fail;
-		for (i =3D 0; i < num_counters_fixed; i++) {
+		for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
 			if (fixed_counter_disabled(i, pmu))
 				continue;
 			if (val & (0x03ULL << i*4)) {
@@ -679,7 +684,7 @@ void x86_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct hw_perf_event *hwc =3D &cpuc->events[idx]->hw;
 		u64 val;
=20
@@ -736,7 +741,7 @@ void x86_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct hw_perf_event *hwc =3D &cpuc->events[idx]->hw;
=20
 		if (!test_bit(idx, cpuc->active_mask))
@@ -975,7 +980,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
=20
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
-	int num_counters =3D hybrid(cpuc->pmu, num_counters);
 	struct event_constraint *c;
 	struct perf_event *e;
 	int n0, i, wmin, wmax, unsched =3D 0;
@@ -1051,7 +1055,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, i=
nt n, int *assign)
=20
 	/* slow path */
 	if (i !=3D n) {
-		int gpmax =3D num_counters;
+		int gpmax =3D x86_pmu_max_num_counters(cpuc->pmu);
=20
 		/*
 		 * Do not allow scheduling of more than half the available
@@ -1072,7 +1076,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, i=
nt n, int *assign)
 		 * the extra Merge events needed by large increment events.
 		 */
 		if (x86_pmu.flags & PMU_FL_PAIR) {
-			gpmax =3D num_counters - cpuc->n_pair;
+			gpmax -=3D cpuc->n_pair;
 			WARN_ON(gpmax <=3D 0);
 		}
=20
@@ -1157,12 +1161,10 @@ static int collect_event(struct cpu_hw_events *cpuc=
, struct perf_event *event,
  */
 static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *l=
eader, bool dogrp)
 {
-	int num_counters =3D hybrid(cpuc->pmu, num_counters);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	struct perf_event *event;
 	int n, max_count;
=20
-	max_count =3D num_counters + num_counters_fixed;
+	max_count =3D x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixe=
d(cpuc->pmu);
=20
 	/* current number of events already accepted */
 	n =3D cpuc->n_events;
@@ -1522,13 +1524,13 @@ void perf_event_print_debug(void)
 	u64 pebs, debugctl;
 	int cpu =3D smp_processor_id();
 	struct cpu_hw_events *cpuc =3D &per_cpu(cpu_hw_events, cpu);
-	int num_counters =3D hybrid(cpuc->pmu, num_counters);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
+	unsigned long *cntr_mask =3D hybrid(cpuc->pmu, cntr_mask);
+	unsigned long *fixed_cntr_mask =3D hybrid(cpuc->pmu, fixed_cntr_mask);
 	struct event_constraint *pebs_constraints =3D hybrid(cpuc->pmu, pebs_cons=
traints);
 	unsigned long flags;
 	int idx;
=20
-	if (!num_counters)
+	if (!*(u64 *)cntr_mask)
 		return;
=20
 	local_irq_save(flags);
@@ -1555,7 +1557,7 @@ void perf_event_print_debug(void)
 	}
 	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
=20
-	for (idx =3D 0; idx < num_counters; idx++) {
+	for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
 		rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
 		rdmsrl(x86_pmu_event_addr(idx), pmc_count);
=20
@@ -1568,7 +1570,7 @@ void perf_event_print_debug(void)
 		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 			cpu, idx, prev_left);
 	}
-	for (idx =3D 0; idx < num_counters_fixed; idx++) {
+	for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
 		if (fixed_counter_disabled(idx, cpuc->pmu))
 			continue;
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
@@ -1682,7 +1684,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
 	 */
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
=20
@@ -2038,18 +2040,15 @@ static void _x86_pmu_read(struct perf_event *event)
 	static_call(x86_pmu_update)(event);
 }
=20
-void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
-			  u64 intel_ctrl)
+void x86_pmu_show_pmu_cap(struct pmu *pmu)
 {
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
-	pr_info("... generic registers:      %d\n",     num_counters);
+	pr_info("... generic registers:      %d\n",     x86_pmu_num_counters(pmu)=
);
 	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
 	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose events:   %lu\n",
-			hweight64((((1ULL << num_counters_fixed) - 1)
-					<< INTEL_PMC_IDX_FIXED) & intel_ctrl));
-	pr_info("... event mask:             %016Lx\n", intel_ctrl);
+	pr_info("... fixed-purpose events:   %d\n",     x86_pmu_num_counters_fixe=
d(pmu));
+	pr_info("... event mask:             %016Lx\n", hybrid(pmu, intel_ctrl));
 }
=20
 static int __init init_hw_perf_events(void)
@@ -2086,7 +2085,7 @@ static int __init init_hw_perf_events(void)
 	pmu_check_apic();
=20
 	/* sanity check that the hardware exists or is emulated */
-	if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fix=
ed))
+	if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
 		goto out_bad_pmu;
=20
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -2097,14 +2096,14 @@ static int __init init_hw_perf_events(void)
 		quirk->func();
=20
 	if (!x86_pmu.intel_ctrl)
-		x86_pmu.intel_ctrl =3D (1 << x86_pmu.num_counters) - 1;
+		x86_pmu.intel_ctrl =3D x86_pmu.cntr_mask64;
=20
 	perf_events_lapic_init();
 	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
=20
 	unconstrained =3D (struct event_constraint)
-		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
-				   0, x86_pmu.num_counters, 0, 0);
+		__EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
+				   0, x86_pmu_num_counters(NULL), 0, 0);
=20
 	x86_pmu_format_group.attrs =3D x86_pmu.format_attrs;
=20
@@ -2113,11 +2112,8 @@ static int __init init_hw_perf_events(void)
=20
 	pmu.attr_update =3D x86_pmu.attr_update;
=20
-	if (!is_hybrid()) {
-		x86_pmu_show_pmu_cap(x86_pmu.num_counters,
-				     x86_pmu.num_counters_fixed,
-				     x86_pmu.intel_ctrl);
-	}
+	if (!is_hybrid())
+		x86_pmu_show_pmu_cap(NULL);
=20
 	if (!x86_pmu.read)
 		x86_pmu.read =3D _x86_pmu_read;
@@ -2481,7 +2477,7 @@ void perf_clear_dirty_counters(void)
 	for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
 		if (i >=3D INTEL_PMC_IDX_FIXED) {
 			/* Metrics and fake events don't have corresponding HW counters. */
-			if ((i - INTEL_PMC_IDX_FIXED) >=3D hybrid(cpuc->pmu, num_counters_fixed=
))
+			if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mas=
k)))
 				continue;
=20
 			wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
@@ -2983,8 +2979,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capab=
ility *cap)
 	 * base PMU holds the correct number of counters for P-cores.
 	 */
 	cap->version		=3D x86_pmu.version;
-	cap->num_counters_gp	=3D x86_pmu.num_counters;
-	cap->num_counters_fixed	=3D x86_pmu.num_counters_fixed;
+	cap->num_counters_gp	=3D x86_pmu_num_counters(NULL);
+	cap->num_counters_fixed	=3D x86_pmu_num_counters_fixed(NULL);
 	cap->bit_width_gp	=3D x86_pmu.cntval_bits;
 	cap->bit_width_fixed	=3D x86_pmu.cntval_bits;
 	cap->events_mask	=3D (unsigned int)x86_pmu.events_maskl;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index c27a9f75defb..844fd005ca91 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2874,23 +2874,23 @@ static void intel_pmu_reset(void)
 {
 	struct debug_store *ds =3D __this_cpu_read(cpu_hw_events.ds);
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
-	int num_counters =3D hybrid(cpuc->pmu, num_counters);
+	unsigned long *cntr_mask =3D hybrid(cpuc->pmu, cntr_mask);
+	unsigned long *fixed_cntr_mask =3D hybrid(cpuc->pmu, fixed_cntr_mask);
 	unsigned long flags;
 	int idx;
=20
-	if (!num_counters)
+	if (!*(u64 *)cntr_mask)
 		return;
=20
 	local_irq_save(flags);
=20
 	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
=20
-	for (idx =3D 0; idx < num_counters; idx++) {
+	for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
 		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
 		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
 	}
-	for (idx =3D 0; idx < num_counters_fixed; idx++) {
+	for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
 		if (fixed_counter_disabled(idx, cpuc->pmu))
 			continue;
 		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
@@ -2940,8 +2940,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs =
*regs,
 	    !guest_pebs_idxs)
 		return;
=20
-	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
-			 INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
+	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX)=
 {
 		event =3D cpuc->events[bit];
 		if (!event->attr.precise_ip)
 			continue;
@@ -4199,7 +4198,7 @@ static struct perf_guest_switch_msr *core_guest_get_m=
srs(int *nr, void *data)
 	struct perf_guest_switch_msr *arr =3D cpuc->guest_switch_msrs;
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++)  {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct perf_event *event =3D cpuc->events[idx];
=20
 		arr[idx].msr =3D x86_pmu_config_addr(idx);
@@ -4217,7 +4216,7 @@ static struct perf_guest_switch_msr *core_guest_get_m=
srs(int *nr, void *data)
 			arr[idx].guest &=3D ~ARCH_PERFMON_EVENTSEL_ENABLE;
 	}
=20
-	*nr =3D x86_pmu.num_counters;
+	*nr =3D x86_pmu_max_num_counters(cpuc->pmu);
 	return arr;
 }
=20
@@ -4232,7 +4231,7 @@ static void core_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct hw_perf_event *hwc =3D &cpuc->events[idx]->hw;
=20
 		if (!test_bit(idx, cpuc->active_mask) ||
@@ -4684,13 +4683,33 @@ static void flip_smm_bit(void *data)
 	}
 }
=20
-static void intel_pmu_check_num_counters(int *num_counters,
-					 int *num_counters_fixed,
-					 u64 *intel_ctrl, u64 fixed_mask);
+static void intel_pmu_check_counters_mask(unsigned long *cntr_mask,
+					  unsigned long *fixed_cntr_mask,
+					  u64 *intel_ctrl)
+{
+	unsigned int bit;
+
+	bit =3D find_last_bit(cntr_mask, X86_PMC_IDX_MAX) + 1;
+	if (bit > INTEL_PMC_MAX_GENERIC) {
+		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+		     bit, INTEL_PMC_MAX_GENERIC);
+		*cntr_mask &=3D GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
+	}
+	*intel_ctrl =3D *cntr_mask;
+
+	bit =3D find_last_bit(fixed_cntr_mask, X86_PMC_IDX_MAX) + 1;
+	if (bit > INTEL_PMC_MAX_FIXED) {
+		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+		     bit, INTEL_PMC_MAX_FIXED);
+		*fixed_cntr_mask &=3D GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
+	}
+
+	*intel_ctrl |=3D (u64)*fixed_cntr_mask << INTEL_PMC_IDX_FIXED;
+}
=20
 static void intel_pmu_check_event_constraints(struct event_constraint *eve=
nt_constraints,
-					      int num_counters,
-					      int num_counters_fixed,
+					      u64 cntr_mask,
+					      u64 fixed_cntr_mask,
 					      u64 intel_ctrl);
=20
 static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
@@ -4713,11 +4732,10 @@ static void update_pmu_cap(struct x86_hybrid_pmu *p=
mu)
 	if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
 		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
 			    &eax, &ebx, &ecx, &edx);
-		pmu->num_counters =3D fls(eax);
-		pmu->num_counters_fixed =3D fls(ebx);
+		pmu->cntr_mask64 =3D eax;
+		pmu->fixed_cntr_mask64 =3D ebx;
 	}
=20
-
 	if (!intel_pmu_broken_perf_cap()) {
 		/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration =
*/
 		rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
@@ -4726,12 +4744,12 @@ static void update_pmu_cap(struct x86_hybrid_pmu *p=
mu)
=20
 static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
 {
-	intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
-				     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
-	pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counte=
rs - 1, 0));
+	intel_pmu_check_counters_mask(pmu->cntr_mask, pmu->fixed_cntr_mask,
+				      &pmu->intel_ctrl);
+	pmu->pebs_events_mask =3D intel_pmu_pebs_mask(pmu->cntr_mask64);
 	pmu->unconstrained =3D (struct event_constraint)
-			     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-						0, pmu->num_counters, 0, 0);
+			     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+						0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
=20
 	if (pmu->intel_cap.perf_metrics)
 		pmu->intel_ctrl |=3D 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
@@ -4744,8 +4762,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hy=
brid_pmu *pmu)
 		pmu->pmu.capabilities &=3D ~PERF_PMU_CAP_AUX_OUTPUT;
=20
 	intel_pmu_check_event_constraints(pmu->event_constraints,
-					  pmu->num_counters,
-					  pmu->num_counters_fixed,
+					  pmu->cntr_mask64,
+					  pmu->fixed_cntr_mask64,
 					  pmu->intel_ctrl);
=20
 	intel_pmu_check_extra_regs(pmu->extra_regs);
@@ -4806,7 +4824,7 @@ static bool init_hybrid_pmu(int cpu)
=20
 	intel_pmu_check_hybrid_pmus(pmu);
=20
-	if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixe=
d))
+	if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask))
 		return false;
=20
 	pr_info("%s PMU driver: ", pmu->name);
@@ -4816,8 +4834,7 @@ static bool init_hybrid_pmu(int cpu)
=20
 	pr_cont("\n");
=20
-	x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed,
-			     pmu->intel_ctrl);
+	x86_pmu_show_pmu_cap(&pmu->pmu);
=20
 end:
 	cpumask_set_cpu(cpu, &pmu->supported_cpus);
@@ -5955,29 +5972,9 @@ static const struct attribute_group *hybrid_attr_upd=
ate[] =3D {
=20
 static struct attribute *empty_attrs;
=20
-static void intel_pmu_check_num_counters(int *num_counters,
-					 int *num_counters_fixed,
-					 u64 *intel_ctrl, u64 fixed_mask)
-{
-	if (*num_counters > INTEL_PMC_MAX_GENERIC) {
-		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
-		     *num_counters, INTEL_PMC_MAX_GENERIC);
-		*num_counters =3D INTEL_PMC_MAX_GENERIC;
-	}
-	*intel_ctrl =3D (1ULL << *num_counters) - 1;
-
-	if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) {
-		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
-		     *num_counters_fixed, INTEL_PMC_MAX_FIXED);
-		*num_counters_fixed =3D INTEL_PMC_MAX_FIXED;
-	}
-
-	*intel_ctrl |=3D fixed_mask << INTEL_PMC_IDX_FIXED;
-}
-
 static void intel_pmu_check_event_constraints(struct event_constraint *eve=
nt_constraints,
-					      int num_counters,
-					      int num_counters_fixed,
+					      u64 cntr_mask,
+					      u64 fixed_cntr_mask,
 					      u64 intel_ctrl)
 {
 	struct event_constraint *c;
@@ -6014,10 +6011,9 @@ static void intel_pmu_check_event_constraints(struct=
 event_constraint *event_con
 			 * generic counters
 			 */
 			if (!use_fixed_pseudo_encoding(c->code))
-				c->idxmsk64 |=3D (1ULL << num_counters) - 1;
+				c->idxmsk64 |=3D cntr_mask;
 		}
-		c->idxmsk64 &=3D
-			~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed));
+		c->idxmsk64 &=3D cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED);
 		c->weight =3D hweight64(c->idxmsk64);
 	}
 }
@@ -6068,12 +6064,12 @@ static __always_inline int intel_pmu_init_hybrid(en=
um hybrid_pmu_type pmus)
 		pmu->pmu_type =3D intel_hybrid_pmu_type_map[bit].id;
 		pmu->name =3D intel_hybrid_pmu_type_map[bit].name;
=20
-		pmu->num_counters =3D x86_pmu.num_counters;
-		pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
-		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_count=
ers - 1, 0));
+		pmu->cntr_mask64 =3D x86_pmu.cntr_mask64;
+		pmu->fixed_cntr_mask64 =3D x86_pmu.fixed_cntr_mask64;
+		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(pmu->cntr_mask64);
 		pmu->unconstrained =3D (struct event_constraint)
-				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-							0, pmu->num_counters, 0, 0);
+				     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+							0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
=20
 		pmu->intel_cap.capabilities =3D x86_pmu.intel_cap.capabilities;
 		if (pmu->pmu_type & hybrid_small) {
@@ -6186,14 +6182,14 @@ __init int intel_pmu_init(void)
 		x86_pmu =3D intel_pmu;
=20
 	x86_pmu.version			=3D version;
-	x86_pmu.num_counters		=3D eax.split.num_counters;
+	x86_pmu.cntr_mask64		=3D GENMASK_ULL(eax.split.num_counters - 1, 0);
 	x86_pmu.cntval_bits		=3D eax.split.bit_width;
 	x86_pmu.cntval_mask		=3D (1ULL << eax.split.bit_width) - 1;
=20
 	x86_pmu.events_maskl		=3D ebx.full;
 	x86_pmu.events_mask_len		=3D eax.split.mask_length;
=20
-	x86_pmu.pebs_events_mask	=3D intel_pmu_pebs_mask(GENMASK_ULL(x86_pmu.num_=
counters - 1, 0));
+	x86_pmu.pebs_events_mask	=3D intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
 	x86_pmu.pebs_capable		=3D PEBS_COUNTER_MASK;
=20
 	/*
@@ -6203,12 +6199,10 @@ __init int intel_pmu_init(void)
 	if (version > 1 && version < 5) {
 		int assume =3D 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
=20
-		x86_pmu.num_counters_fixed =3D
-			max((int)edx.split.num_counters_fixed, assume);
-
-		fixed_mask =3D (1L << x86_pmu.num_counters_fixed) - 1;
+		x86_pmu.fixed_cntr_mask64 =3D
+			GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0);
 	} else if (version >=3D 5)
-		x86_pmu.num_counters_fixed =3D fls(fixed_mask);
+		x86_pmu.fixed_cntr_mask64 =3D fixed_mask;
=20
 	if (boot_cpu_has(X86_FEATURE_PDCM)) {
 		u64 capabilities;
@@ -6803,11 +6797,13 @@ __init int intel_pmu_init(void)
 		pmu =3D &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
 		intel_pmu_init_glc(&pmu->pmu);
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
-			pmu->num_counters =3D x86_pmu.num_counters + 2;
-			pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed + 1;
+			pmu->cntr_mask64 <<=3D 2;
+			pmu->cntr_mask64 |=3D 0x3;
+			pmu->fixed_cntr_mask64 <<=3D 1;
+			pmu->fixed_cntr_mask64 |=3D 0x1;
 		} else {
-			pmu->num_counters =3D x86_pmu.num_counters;
-			pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
+			pmu->cntr_mask64 =3D x86_pmu.cntr_mask64;
+			pmu->fixed_cntr_mask64 =3D x86_pmu.fixed_cntr_mask64;
 		}
=20
 		/*
@@ -6817,15 +6813,16 @@ __init int intel_pmu_init(void)
 		 * mistakenly add extra counters for P-cores. Correct the number of
 		 * counters here.
 		 */
-		if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
-			pmu->num_counters =3D x86_pmu.num_counters;
-			pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
+		if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed=
(&pmu->pmu) > 4)) {
+			pmu->cntr_mask64 =3D x86_pmu.cntr_mask64;
+			pmu->fixed_cntr_mask64 =3D x86_pmu.fixed_cntr_mask64;
 		}
=20
-		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_count=
ers - 1, 0));
+		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(pmu->cntr_mask64);
 		pmu->unconstrained =3D (struct event_constraint)
-					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-							   0, pmu->num_counters, 0, 0);
+				     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+				     0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
+
 		pmu->extra_regs =3D intel_glc_extra_regs;
=20
 		/* Initialize Atom core specific PerfMon capabilities.*/
@@ -6892,9 +6889,9 @@ __init int intel_pmu_init(void)
 			 * The constraints may be cut according to the CPUID enumeration
 			 * by inserting the EVENT_CONSTRAINT_END.
 			 */
-			if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
-				x86_pmu.num_counters_fixed =3D INTEL_PMC_MAX_FIXED;
-			intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight =3D -=
1;
+			if (find_last_bit(x86_pmu.fixed_cntr_mask, X86_PMC_IDX_MAX) > INTEL_PMC=
_MAX_FIXED)
+				x86_pmu.fixed_cntr_mask64 &=3D GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
+			intel_v5_gen_event_constraints[find_last_bit(x86_pmu.fixed_cntr_mask, I=
NTEL_PMC_MAX_FIXED) + 1].weight =3D -1;
 			x86_pmu.event_constraints =3D intel_v5_gen_event_constraints;
 			pr_cont("generic architected perfmon, ");
 			name =3D "generic_arch_v5+";
@@ -6921,18 +6918,17 @@ __init int intel_pmu_init(void)
 		x86_pmu.attr_update =3D hybrid_attr_update;
 	}
=20
-	intel_pmu_check_num_counters(&x86_pmu.num_counters,
-				     &x86_pmu.num_counters_fixed,
-				     &x86_pmu.intel_ctrl,
-				     (u64)fixed_mask);
+	intel_pmu_check_counters_mask(x86_pmu.cntr_mask,
+				      x86_pmu.fixed_cntr_mask,
+				      &x86_pmu.intel_ctrl);
=20
 	/* AnyThread may be deprecated on arch perfmon v5 or later */
 	if (x86_pmu.intel_cap.anythread_deprecated)
 		x86_pmu.format_attrs =3D intel_arch_formats_attr;
=20
 	intel_pmu_check_event_constraints(x86_pmu.event_constraints,
-					  x86_pmu.num_counters,
-					  x86_pmu.num_counters_fixed,
+					  x86_pmu.cntr_mask64,
+					  x86_pmu.fixed_cntr_mask64,
 					  x86_pmu.intel_ctrl);
 	/*
 	 * Access LBR MSR may cause #GP under certain circumstances.
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index f6105b8dcf87..6f834a7d852a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1138,7 +1138,6 @@ static inline void pebs_update_threshold(struct cpu_h=
w_events *cpuc)
 {
 	struct debug_store *ds =3D cpuc->ds;
 	int max_pebs_events =3D intel_pmu_max_num_pebs(cpuc->pmu);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	u64 threshold;
 	int reserved;
=20
@@ -1146,7 +1145,7 @@ static inline void pebs_update_threshold(struct cpu_h=
w_events *cpuc)
 		return;
=20
 	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
-		reserved =3D max_pebs_events + num_counters_fixed;
+		reserved =3D max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
 	else
 		reserved =3D max_pebs_events;
=20
@@ -2172,8 +2171,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *=
iregs, struct perf_sample_d
 	mask =3D x86_pmu.pebs_events_mask;
 	size =3D max_pebs_events;
 	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
-		mask |=3D ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FI=
XED;
-		size =3D INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
+		mask |=3D x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
+		size =3D INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
 	}
=20
 	if (unlikely(base >=3D top)) {
@@ -2269,11 +2268,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs=
 *iregs, struct perf_sample_d
 {
 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] =3D {};
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	struct debug_store *ds =3D cpuc->ds;
 	struct perf_event *event;
 	void *base, *at, *top;
-	int bit, size;
+	int bit;
 	u64 mask;
=20
 	if (!x86_pmu.pebs_active)
@@ -2285,11 +2283,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs=
 *iregs, struct perf_sample_d
 	ds->pebs_index =3D ds->pebs_buffer_base;
=20
 	mask =3D hybrid(cpuc->pmu, pebs_events_mask) |
-	       (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
-	size =3D INTEL_PMC_IDX_FIXED + num_counters_fixed;
+	       (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
=20
 	if (unlikely(base >=3D top)) {
-		intel_pmu_pebs_event_update_no_drain(cpuc, size);
+		intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
 		return;
 	}
=20
@@ -2299,11 +2296,11 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs=
 *iregs, struct perf_sample_d
 		pebs_status =3D get_pebs_status(at) & cpuc->pebs_enabled;
 		pebs_status &=3D mask;
=20
-		for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
+		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX)
 			counts[bit]++;
 	}
=20
-	for_each_set_bit(bit, (unsigned long *)&mask, size) {
+	for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
 		if (counts[bit] =3D=3D 0)
 			continue;
=20
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
index 618001c208e8..034a1f6a457c 100644
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst =3D {
 	.apic			=3D 1,
 	.max_period		=3D (1ULL << 39) - 1,
 	.version		=3D 0,
-	.num_counters		=3D 2,
+	.cntr_mask64		=3D 0x3,
 	.cntval_bits		=3D 40,
 	.cntval_mask		=3D (1ULL << 40) - 1,
 	.get_event_constraints	=3D x86_get_event_constraints,
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 35936188db01..844bc4fc4724 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -919,7 +919,7 @@ static void p4_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct perf_event *event =3D cpuc->events[idx];
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -998,7 +998,7 @@ static void p4_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct perf_event *event =3D cpuc->events[idx];
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -1040,7 +1040,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
=20
 	cpuc =3D this_cpu_ptr(&cpu_hw_events);
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		int overflow;
=20
 		if (!test_bit(idx, cpuc->active_mask)) {
@@ -1353,7 +1353,7 @@ static __initconst const struct x86_pmu p4_pmu =3D {
 	 * though leave it restricted at moment assuming
 	 * HT is on
 	 */
-	.num_counters		=3D ARCH_P4_MAX_CCCR,
+	.cntr_mask64		=3D GENMASK_ULL(ARCH_P4_MAX_CCCR - 1, 0),
 	.apic			=3D 1,
 	.cntval_bits		=3D ARCH_P4_CNTRVAL_BITS,
 	.cntval_mask		=3D ARCH_P4_CNTRVAL_MASK,
@@ -1395,7 +1395,7 @@ __init int p4_pmu_init(void)
 	 *
 	 * Solve this by zero'ing out the registers to mimic a reset.
 	 */
-	for (i =3D 0; i < x86_pmu.num_counters; i++) {
+	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		reg =3D x86_pmu_config_addr(i);
 		wrmsrl_safe(reg, 0ULL);
 	}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
index 408879b0c0d4..a6cffb4f4ef5 100644
--- a/arch/x86/events/intel/p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -214,7 +214,7 @@ static __initconst const struct x86_pmu p6_pmu =3D {
 	.apic			=3D 1,
 	.max_period		=3D (1ULL << 31) - 1,
 	.version		=3D 0,
-	.num_counters		=3D 2,
+	.cntr_mask64		=3D 0x3,
 	/*
 	 * Events have 40 bits implemented. However they are designed such
 	 * that bits [32-39] are sign extensions of bit 31. As such the
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 0e411539f88a..b3214d6e8f4c 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -685,8 +685,14 @@ struct x86_hybrid_pmu {
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
 	u64				pebs_events_mask;
-	int				num_counters;
-	int				num_counters_fixed;
+	union {
+			u64		cntr_mask64;
+			unsigned long	cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
+	union {
+			u64		fixed_cntr_mask64;
+			unsigned long	fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
 	struct event_constraint		unconstrained;
=20
 	u64				hw_cache_event_ids
@@ -774,8 +780,14 @@ struct x86_pmu {
 	int		(*rdpmc_index)(int index);
 	u64		(*event_map)(int);
 	int		max_events;
-	int		num_counters;
-	int		num_counters_fixed;
+	union {
+			u64		cntr_mask64;
+			unsigned long	cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
+	union {
+			u64		fixed_cntr_mask64;
+			unsigned long	fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
 	int		cntval_bits;
 	u64		cntval_mask;
 	union {
@@ -1125,8 +1137,8 @@ static inline int x86_pmu_rdpmc_index(int index)
 	return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
 }
=20
-bool check_hw_exists(struct pmu *pmu, int num_counters,
-		     int num_counters_fixed);
+bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
+		     unsigned long *fixed_cntr_mask);
=20
 int x86_add_exclusive(unsigned int what);
=20
@@ -1197,8 +1209,27 @@ void x86_pmu_enable_event(struct perf_event *event);
=20
 int x86_pmu_handle_irq(struct pt_regs *regs);
=20
-void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
-			  u64 intel_ctrl);
+void x86_pmu_show_pmu_cap(struct pmu *pmu);
+
+static inline int x86_pmu_num_counters(struct pmu *pmu)
+{
+	return hweight64(hybrid(pmu, cntr_mask64));
+}
+
+static inline int x86_pmu_max_num_counters(struct pmu *pmu)
+{
+	return find_last_bit(hybrid(pmu, cntr_mask), X86_PMC_IDX_MAX) + 1;
+}
+
+static inline int x86_pmu_num_counters_fixed(struct pmu *pmu)
+{
+	return hweight64(hybrid(pmu, fixed_cntr_mask64));
+}
+
+static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu)
+{
+	return find_last_bit(hybrid(pmu, fixed_cntr_mask), INTEL_PMC_MAX_FIXED) +=
 1;
+}
=20
 extern struct event_constraint emptyconstraint;
=20
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 3e9acdaeed1e..2fd9b0cf9a5e 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void)
 	pr_info("Version check pass!\n");
=20
 	x86_pmu.version			=3D version;
-	x86_pmu.num_counters		=3D eax.split.num_counters;
+	x86_pmu.cntr_mask64		=3D GENMASK_ULL(eax.split.num_counters - 1, 0);
 	x86_pmu.cntval_bits		=3D eax.split.bit_width;
 	x86_pmu.cntval_mask		=3D (1ULL << eax.split.bit_width) - 1;
 	x86_pmu.events_maskl		=3D ebx.full;
 	x86_pmu.events_mask_len		=3D eax.split.mask_length;
=20
-	x86_pmu.num_counters_fixed =3D edx.split.num_counters_fixed;
+	x86_pmu.fixed_cntr_mask64	=3D GENMASK_ULL(edx.split.num_counters_fixed - =
1, 0);
 	x86_add_quirk(zhaoxin_arch_events_quirk);
=20
 	switch (boot_cpu_data.x86) {
@@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void)
 		return -ENODEV;
 	}
=20
-	x86_pmu.intel_ctrl =3D (1 << (x86_pmu.num_counters)) - 1;
-	x86_pmu.intel_ctrl |=3D ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_=
PMC_IDX_FIXED;
+	x86_pmu.intel_ctrl =3D x86_pmu.cntr_mask64;
+	x86_pmu.intel_ctrl |=3D x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
=20
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			c->idxmsk64 |=3D (1ULL << x86_pmu.num_counters) - 1;
-			c->weight +=3D x86_pmu.num_counters;
+			c->idxmsk64 |=3D x86_pmu.cntr_mask64;
+			c->weight +=3D x86_pmu_num_counters(NULL);
 		}
 	}
=20
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 78B6F179972
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:33 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339755; cv=none;
 b=so33F9vXTfxee4YeVxOHBtuxoYKhjT/T21bq69b6TS/fBboikWMs2c36B01IAeYNZLs2WSRB4UELx2NWWbmuIIM4zV9wR7/EHy8/y+WcqLETTCSNhMkc0A0sruPKNholQlyWwC2dABSJNK1BZY0k2g9Ldg63pWt2TJtoq3CvsNI=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339755; c=relaxed/simple;
	bh=hMX4mL647GPpwWYuFeDg/KL7cPNKmPitCBChD+E8P5Y=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=hKr7Zw8Fob8SNVoldrBzFFirAmrZzf34dqnGhSx5b3cWcVXOo+Lg1Kp5QRcPBuXjNV5OtyQljpYz6TLjZnRA0qly6aphOTdntKGzjoJCrsDRdc1pW5Nnwe1rz49a/aLDkptLLjBzFdFCNQU21NWLgqo2M1hLw70OtNla+8CwADc=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=YZs2kDot; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="YZs2kDot"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339753; x=1750875753;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=hMX4mL647GPpwWYuFeDg/KL7cPNKmPitCBChD+E8P5Y=;
  b=YZs2kDotxYX11CswS/mIvoMeo2aQqZUB/PATLuXYRaY5MvVANPukxVmW
   HP9cPlb4IvFJnGimdIO2kzJ+WT/b9iAWkJIDp3Ok6R1i9jErtlj7UHLLh
   fquHUEsX0bhws+WagRh7xawafUZl0F+Ilq+vm1PgtTGKPg1conK2MeAUZ
   xy2q/rzO4R/TP29CmGIM12391JJkWFWvEVF5aASkl/UsrLcL2OX7aEz/4
   C/6FEuzEYX9slYWG+1ruET1ZIoGjp8+qUPOu9lWx7dTZmsS6+azbNiW9h
   SH2F4KRo170nLkNkc037hNvWg9SkhRBLgAwH9NZ4W8kRD82MKGyT99cTh
   Q==;
X-CSE-ConnectionGUID: eyXgVv3ZQEyKn/dFWlbxRA==
X-CSE-MsgGUID: +mNpYP8NSdaDPbhehtCYYA==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204003"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204003"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:21 -0700
X-CSE-ConnectionGUID: E67uhHxRS/SCliW2YngynQ==
X-CSE-MsgGUID: 3zUn2SBQRByYFTeohBfPVQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913344"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:06 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 03/13] perf/x86: Add Lunar Lake and Arrow Lake support
Date: Tue, 25 Jun 2024 11:22:46 -0700
Message-Id: <20240625182256.291914-4-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

From PMU's perspective, Lunar Lake and Arrow Lake are similar to the
previous generation Meteor Lake. Both are hybrid platforms, with e-core
and p-core.

The key differences include:
- The e-core supports 3 new fixed counters
- The p-core supports an updated PEBS Data Source format
- More GP counters (Updated event constraint table)
- New Architectural performance monitoring V6
  (New Perfmon MSRs aliasing, umask2, eq).
- New PEBS format V6 (Counters Snapshotting group)
- New RDPMC metrics clear mode

The legacy features, the 3 new fixed counters and updated event
constraint table are enabled in this patch.

The new PEBS data source format, the architectural performance
monitoring V6, the PEBS format V6, and the new RDPMC metrics clear mode
are supported in the following patches.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/intel/core.c      | 117 ++++++++++++++++++++++++++++++
 arch/x86/events/intel/ds.c        |  24 ++++++
 arch/x86/events/perf_event.h      |   2 +
 arch/x86/include/asm/perf_event.h |   4 +
 4 files changed, 147 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 844fd005ca91..e998e3ef8c1c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -220,6 +220,17 @@ static struct event_constraint intel_grt_event_constra=
ints[] __read_mostly =3D {
 	EVENT_CONSTRAINT_END
 };
=20
+static struct event_constraint intel_skt_event_constraints[] __read_mostly=
 =3D {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+	FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */
+	FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */
+	FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */
+	EVENT_CONSTRAINT_END
+};
+
 static struct event_constraint intel_skl_event_constraints[] =3D {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
@@ -370,6 +381,55 @@ static struct extra_reg intel_rwc_extra_regs[] __read_=
mostly =3D {
 	EVENT_EXTRA_END
 };
=20
+static struct event_constraint intel_lnc_event_constraints[] =3D {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2),	/* CPU_CLK_UNHALTED.REF_TSC_P */
+	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
+
+	INTEL_UEVENT_CONSTRAINT(0x0148, 0x4),
+	INTEL_UEVENT_CONSTRAINT(0x0175, 0x4),
+
+	INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff),
+	INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff),
+	/*
+	 * Generally event codes < 0x90 are restricted to counters 0-3.
+	 * The 0x2E and 0x3C are exception, which has no restriction.
+	 */
+	INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),
+
+	INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
+	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
+	INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
+	INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
+	INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1),
+	INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8),
+	INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3),
+	INTEL_EVENT_CONSTRAINT(0xce, 0x1),
+
+	INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
+	/*
+	 * Generally event codes >=3D 0x90 are likely to have no restrictions.
+	 * The exception are defined as above.
+	 */
+	INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff),
+
+	EVENT_CONSTRAINT_END
+};
+
+
 EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=3D0x0b,umask=3D0x10,ldlat=3D3=
");
 EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=3D0xcd,umask=3D0x1,ldlat=3D3"=
);
 EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=3D0xcd,umask=3D0x2");
@@ -5790,6 +5850,23 @@ static struct attribute *adl_hybrid_events_attrs[] =
=3D {
 	NULL,
 };
=20
+EVENT_ATTR_STR_HYBRID(topdown-retiring,      td_retiring_lnl,  "event=3D0x=
c2,umask=3D0x02;event=3D0x00,umask=3D0x80", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-fe-bound,      td_fe_bound_lnl,  "event=3D0x=
9c,umask=3D0x01;event=3D0x00,umask=3D0x82", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-be-bound,      td_be_bound_lnl,  "event=3D0x=
a4,umask=3D0x02;event=3D0x00,umask=3D0x83", hybrid_big_small);
+
+static struct attribute *lnl_hybrid_events_attrs[] =3D {
+	EVENT_PTR(slots_adl),
+	EVENT_PTR(td_retiring_lnl),
+	EVENT_PTR(td_bad_spec_adl),
+	EVENT_PTR(td_fe_bound_lnl),
+	EVENT_PTR(td_be_bound_lnl),
+	EVENT_PTR(td_heavy_ops_adl),
+	EVENT_PTR(td_br_mis_adl),
+	EVENT_PTR(td_fetch_lat_adl),
+	EVENT_PTR(td_mem_bound_adl),
+	NULL
+};
+
 /* Must be in IDX order */
 EVENT_ATTR_STR_HYBRID(mem-loads,     mem_ld_adl,     "event=3D0xd0,umask=
=3D0x5,ldlat=3D3;event=3D0xcd,umask=3D0x1,ldlat=3D3", hybrid_big_small);
 EVENT_ATTR_STR_HYBRID(mem-stores,    mem_st_adl,     "event=3D0xd0,umask=
=3D0x6;event=3D0xcd,umask=3D0x2",                 hybrid_big_small);
@@ -6139,6 +6216,21 @@ static __always_inline void intel_pmu_init_grt(struc=
t pmu *pmu)
 	intel_pmu_ref_cycles_ext();
 }
=20
+static __always_inline void intel_pmu_init_lnc(struct pmu *pmu)
+{
+	intel_pmu_init_glc(pmu);
+	hybrid(pmu, event_constraints) =3D intel_lnc_event_constraints;
+	hybrid(pmu, pebs_constraints) =3D intel_lnc_pebs_event_constraints;
+	hybrid(pmu, extra_regs) =3D intel_rwc_extra_regs;
+}
+
+static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
+{
+	intel_pmu_init_grt(pmu);
+	hybrid(pmu, event_constraints) =3D intel_skt_event_constraints;
+	hybrid(pmu, extra_regs) =3D intel_cmt_extra_regs;
+}
+
 __init int intel_pmu_init(void)
 {
 	struct attribute **extra_skl_attr =3D &empty_attrs;
@@ -6864,6 +6956,31 @@ __init int intel_pmu_init(void)
 		name =3D "meteorlake_hybrid";
 		break;
=20
+	case INTEL_FAM6_LUNARLAKE_M:
+	case INTEL_FAM6_ARROWLAKE:
+		intel_pmu_init_hybrid(hybrid_big_small);
+
+		x86_pmu.get_event_constraints =3D mtl_get_event_constraints;
+		x86_pmu.hw_config =3D adl_hw_config;
+
+		td_attr =3D lnl_hybrid_events_attrs;
+		mem_attr =3D mtl_hybrid_mem_attrs;
+		tsx_attr =3D adl_hybrid_tsx_attrs;
+		extra_attr =3D boot_cpu_has(X86_FEATURE_RTM) ?
+			mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+		/* Initialize big core specific PerfMon capabilities.*/
+		pmu =3D &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+		intel_pmu_init_lnc(&pmu->pmu);
+
+		/* Initialize Atom core specific PerfMon capabilities.*/
+		pmu =3D &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+		intel_pmu_init_skt(&pmu->pmu);
+
+		pr_cont("Lunarlake Hybrid events, ");
+		name =3D "lunarlake_hybrid";
+		break;
+
 	default:
 		switch (x86_pmu.version) {
 		case 1:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 6f834a7d852a..79e23dec6714 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1086,6 +1086,30 @@ struct event_constraint intel_glc_pebs_event_constra=
ints[] =3D {
 	EVENT_CONSTRAINT_END
 };
=20
+struct event_constraint intel_lnc_pebs_event_constraints[] =3D {
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PRE=
C_DIST */
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
+
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED=
.STLB_MISS_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED=
.STLB_MISS_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED=
.LOCK_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED=
.SPLIT_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED=
.SPLIT_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED=
.ALL_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED=
.ALL_STORES */
+
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
+
+	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
+
+	/*
+	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
+	 * need the full constraints from the main table.
+	 */
+
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *pebs_constraints =3D hybrid(event->pmu, pebs_con=
straints);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index b3214d6e8f4c..3c781dabce76 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1582,6 +1582,8 @@ extern struct event_constraint intel_icl_pebs_event_c=
onstraints[];
=20
 extern struct event_constraint intel_glc_pebs_event_constraints[];
=20
+extern struct event_constraint intel_lnc_pebs_event_constraints[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
=20
 void intel_pmu_pebs_add(struct perf_event *event);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 7f1e17250546..400c909b8658 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -307,6 +307,10 @@ struct x86_pmu_capability {
 #define INTEL_PMC_IDX_FIXED_SLOTS	(INTEL_PMC_IDX_FIXED + 3)
 #define INTEL_PMC_MSK_FIXED_SLOTS	(1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
=20
+/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */
+/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */
+/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */
+
 static inline bool use_fixed_pseudo_encoding(u64 code)
 {
 	return !(code & 0xff);
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7FBD417995A
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:35 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339757; cv=none;
 b=D29c+hPrY7iZMG5Ymgq20C5yLuqqL8y4XlONXYESKSRpWwEqqc7RMeWaYkwui2j6wyKyFPzhEpprs7QT8+c4e6KmJnQ8d1BiryCvmU3EcXMn5/1SXT2Z7cAA3841GHQH9yRbIrRbEWAJ/d4RfgyaM1yyh+jpJ6dkbunBBtj+0ws=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339757; c=relaxed/simple;
	bh=jioh27lFNyL/U9xa3tsB0eU4SRcLEr+62WXb7dPpRDY=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=QCTjN4sUa8ntVPtBMo2QIRTl8uovnnvBg/uHh1ooELf923MZx30lKc5adCU2TX7NbqV7wdShSrbGeqPCeNYMNjPJvMDE6/nHFGG+9ajO0eG8B2SCc52C0DN2OsVTzUQy8a9z1urFwlH3sfU8Ra0gUYOAhrqiQmRT7wAfz3XCM7c=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=YP2XCJzN; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="YP2XCJzN"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339755; x=1750875755;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=jioh27lFNyL/U9xa3tsB0eU4SRcLEr+62WXb7dPpRDY=;
  b=YP2XCJzNYlNLBoRjnGqCaUxQ+pM+IeRTcAv+YEFATPTGe47rdYevLDn2
   TdaItbZWI2jBQxC8djwil301XNa5Edu/mMNwSAP+RuKx7psw9qXgOyCpi
   ZIrpsYGqZB64byBvQDE2YetK1lnE0fXnCONW2qAQlMgXS6KWsExE0wemZ
   2MHiaZ2Pc+47ebLjZsh+kevi6u+7ro7owyBikawIEH6dLOnBW6XP8uG/p
   v/I5pDJrAtx6XjwCcxg5uEWmccv1tceh3M9L8UI9lrUc2Vu0Csbn84yFp
   z1WvwCE4rNo8t7RgG2lY+uIRrrtiVVt+GR5Xk9vl71NPR/y5jG7ZM7bSD
   g==;
X-CSE-ConnectionGUID: AQPUvdaKRM6QmN7vXLNLHA==
X-CSE-MsgGUID: kCamQRV6TgS/Pely3BA25A==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204002"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204002"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:21 -0700
X-CSE-ConnectionGUID: LxwE2yo0S1GkU3vwWbRRuQ==
X-CSE-MsgGUID: kjtdvWdMTDKO3u6UNgYpQA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913347"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:06 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 04/13] perf/x86/intel: Rename model-specific
 pebs_latency_data functions
Date: Tue, 25 Jun 2024 11:22:47 -0700
Message-Id: <20240625182256.291914-5-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The model-specific pebs_latency_data functions of ADL and MTL use the
"small" as a postfix to indicate the e-core. The postfix is too generic
for a model-specific function. It cannot provide useful information that
can directly map it to a specific uarch, which can facilitate the
development and maintenance.
Use the abbr of the uarch to rename the model-specific functions.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/intel/core.c |  8 ++++----
 arch/x86/events/intel/ds.c   | 20 ++++++++++----------
 arch/x86/events/perf_event.h |  4 ++--
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index e998e3ef8c1c..ca46c5c31f78 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6509,7 +6509,7 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_ATOM_GRACEMONT:
 		intel_pmu_init_grt(NULL);
 		intel_pmu_pebs_data_source_grt();
-		x86_pmu.pebs_latency_data =3D adl_latency_data_small;
+		x86_pmu.pebs_latency_data =3D adl_latency_data_grt;
 		x86_pmu.get_event_constraints =3D tnt_get_event_constraints;
 		td_attr =3D tnt_events_attrs;
 		mem_attr =3D grt_mem_attrs;
@@ -6523,7 +6523,7 @@ __init int intel_pmu_init(void)
 		intel_pmu_init_grt(NULL);
 		x86_pmu.extra_regs =3D intel_cmt_extra_regs;
 		intel_pmu_pebs_data_source_cmt();
-		x86_pmu.pebs_latency_data =3D mtl_latency_data_small;
+		x86_pmu.pebs_latency_data =3D mtl_latency_data_cmt;
 		x86_pmu.get_event_constraints =3D cmt_get_event_constraints;
 		td_attr =3D cmt_events_attrs;
 		mem_attr =3D grt_mem_attrs;
@@ -6874,7 +6874,7 @@ __init int intel_pmu_init(void)
 		 */
 		intel_pmu_init_hybrid(hybrid_big_small);
=20
-		x86_pmu.pebs_latency_data =3D adl_latency_data_small;
+		x86_pmu.pebs_latency_data =3D adl_latency_data_grt;
 		x86_pmu.get_event_constraints =3D adl_get_event_constraints;
 		x86_pmu.hw_config =3D adl_hw_config;
 		x86_pmu.get_hybrid_cpu_type =3D adl_get_hybrid_cpu_type;
@@ -6931,7 +6931,7 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_METEORLAKE_L:
 		intel_pmu_init_hybrid(hybrid_big_small);
=20
-		x86_pmu.pebs_latency_data =3D mtl_latency_data_small;
+		x86_pmu.pebs_latency_data =3D mtl_latency_data_cmt;
 		x86_pmu.get_event_constraints =3D mtl_get_event_constraints;
 		x86_pmu.hw_config =3D adl_hw_config;
=20
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 79e23dec6714..8a11f72a22b6 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -257,8 +257,8 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb=
, bool lock)
 }
=20
 /* Retrieve the latency data for e-core of ADL */
-static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
-				     u8 dse, bool tlb, bool lock, bool blk)
+static u64 __adl_latency_data_grt(struct perf_event *event, u64 status,
+				  u8 dse, bool tlb, bool lock, bool blk)
 {
 	u64 val;
=20
@@ -277,27 +277,27 @@ static u64 __adl_latency_data_small(struct perf_event=
 *event, u64 status,
 	return val;
 }
=20
-u64 adl_latency_data_small(struct perf_event *event, u64 status)
+u64 adl_latency_data_grt(struct perf_event *event, u64 status)
 {
 	union intel_x86_pebs_dse dse;
=20
 	dse.val =3D status;
=20
-	return __adl_latency_data_small(event, status, dse.ld_dse,
-					dse.ld_locked, dse.ld_stlb_miss,
-					dse.ld_data_blk);
+	return __adl_latency_data_grt(event, status, dse.ld_dse,
+				      dse.ld_locked, dse.ld_stlb_miss,
+				      dse.ld_data_blk);
 }
=20
 /* Retrieve the latency data for e-core of MTL */
-u64 mtl_latency_data_small(struct perf_event *event, u64 status)
+u64 mtl_latency_data_cmt(struct perf_event *event, u64 status)
 {
 	union intel_x86_pebs_dse dse;
=20
 	dse.val =3D status;
=20
-	return __adl_latency_data_small(event, status, dse.mtl_dse,
-					dse.mtl_stlb_miss, dse.mtl_locked,
-					dse.mtl_fwd_blk);
+	return __adl_latency_data_grt(event, status, dse.mtl_dse,
+				      dse.mtl_stlb_miss, dse.mtl_locked,
+				      dse.mtl_fwd_blk);
 }
=20
 static u64 load_latency_data(struct perf_event *event, u64 status)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 3c781dabce76..e9374b4360d4 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1548,9 +1548,9 @@ void intel_pmu_disable_bts(void);
=20
 int intel_pmu_drain_bts_buffer(void);
=20
-u64 adl_latency_data_small(struct perf_event *event, u64 status);
+u64 adl_latency_data_grt(struct perf_event *event, u64 status);
=20
-u64 mtl_latency_data_small(struct perf_event *event, u64 status);
+u64 mtl_latency_data_cmt(struct perf_event *event, u64 status);
=20
 extern struct event_constraint intel_core2_pebs_event_constraints[];
=20
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id EFC5517CA0F
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:40 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339762; cv=none;
 b=m7G7vkCHRQw0dLSwOcbmroQUe7qefQXKZrJUF+xdHW15wqg+/1Vh9tSAUXG7UU94e3U9DbHHFp85/dsWrL8568/kYJaonKrqu7MW8cwA5fgHNf0q6t1hIUIRNzfz5Mv7HjSZyA5DpZrZzHfHInzIK3Z2CwkqtC6VOVg7J3FpS9E=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339762; c=relaxed/simple;
	bh=bMA/O1OoTClfjxzm+m4jzRIzazEs5iv3K3dpOJbjtZQ=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=kEa4+gH/bC28WRYskR3yAkEd+ZMAS4hf0IBQRB4wzx7KylhKeNwP50S7g0hskoYQebNOYoTiJ1Wn5F2To8pE6sYYPQUxqR5XSDcZJoSspl9NN0TJtAtVAnNgVte0GIOzRlCJWi/U68sfdwujBsj3Oq4PLI0iMMFxdyHc2smT5as=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=bmFKDxpp; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="bmFKDxpp"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339761; x=1750875761;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=bMA/O1OoTClfjxzm+m4jzRIzazEs5iv3K3dpOJbjtZQ=;
  b=bmFKDxppqzqaVTZvHQmlQT59RqMC1G7AYVk7QZrsEQgmeK06+//rYFH1
   FrC/tg7Y3jiLOE8PTuNWoUir3QQyUQ7V/aQmHKUjqwrUJEI2SDEThIE3a
   gQtzUF3aubOXvMwxrmUSjUCYGcev+2ND5l5SncFhGX0KDa3kJLOyEzOnU
   4rAO1BVJKOLtEquSYm21rLP3noPhpDy9uBj+AFaEPefThHBVgcYYEIFaF
   Z7HIqajwRIcv4ZmXrUgY15xpjlw78KUl9DB5szsdqhzgLxlcAxS7N4zJx
   Bxb75qFn84Ler2OUn6GV5I6IQ/fT/I+yV8+7dZ7UOt4WaCFOvZ3uD9RYS
   g==;
X-CSE-ConnectionGUID: lTLa6BpiR+uLdYCXaw3K6Q==
X-CSE-MsgGUID: DIe6bm46Q8aT3xpVQyqPZg==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204025"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204025"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: SGNj8gxYTlafrk0uJvs6mw==
X-CSE-MsgGUID: 0yzQDzimRSy26CSafY8WwA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913348"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:06 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 05/13] perf/x86/intel: Support new data source for Lunar
 Lake
Date: Tue, 25 Jun 2024 11:22:48 -0700
Message-Id: <20240625182256.291914-6-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

A new PEBS data source format is introduced for the p-core of Lunar
Lake. The data source field is extended to 8 bits with new encodings.

A new layout is introduced into the union intel_x86_pebs_dse.
Introduce the lnl_latency_data() to parse the new format.
Enlarge the pebs_data_source[] accordingly to include new encodings.

Only the mem load and the mem store events can generate the data source.
Introduce INTEL_HYBRID_LDLAT_CONSTRAINT and
INTEL_HYBRID_STLAT_CONSTRAINT to mark them.

Add two new bits for the new cache-related data src, L2_MHB and MSC.
The L2_MHB is short for L2 Miss Handling Buffer, which is similar to
LFB (Line Fill Buffer), but to track the L2 Cache misses.
The MSC stands for the memory-side cache.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/intel/core.c    |  2 +
 arch/x86/events/intel/ds.c      | 88 ++++++++++++++++++++++++++++++++-
 arch/x86/events/perf_event.h    | 16 +++++-
 include/uapi/linux/perf_event.h |  6 ++-
 4 files changed, 107 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ca46c5c31f78..ab61b9ef677a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6960,6 +6960,7 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_ARROWLAKE:
 		intel_pmu_init_hybrid(hybrid_big_small);
=20
+		x86_pmu.pebs_latency_data =3D lnl_latency_data;
 		x86_pmu.get_event_constraints =3D mtl_get_event_constraints;
 		x86_pmu.hw_config =3D adl_hw_config;
=20
@@ -6977,6 +6978,7 @@ __init int intel_pmu_init(void)
 		pmu =3D &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
 		intel_pmu_init_skt(&pmu->pmu);
=20
+		intel_pmu_pebs_data_source_lnl();
 		pr_cont("Lunarlake Hybrid events, ");
 		name =3D "lunarlake_hybrid";
 		break;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8a11f72a22b6..ce7e98409f29 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -63,6 +63,15 @@ union intel_x86_pebs_dse {
 		unsigned int mtl_fwd_blk:1;
 		unsigned int ld_reserved4:24;
 	};
+	struct {
+		unsigned int lnc_dse:8;
+		unsigned int ld_reserved5:2;
+		unsigned int lnc_stlb_miss:1;
+		unsigned int lnc_locked:1;
+		unsigned int lnc_data_blk:1;
+		unsigned int lnc_addr_blk:1;
+		unsigned int ld_reserved6:18;
+	};
 };
=20
=20
@@ -77,7 +86,7 @@ union intel_x86_pebs_dse {
 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
=20
 /* Version for Sandy Bridge and later */
-static u64 pebs_data_source[] =3D {
+static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] =3D {
 	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
@@ -173,6 +182,40 @@ void __init intel_pmu_pebs_data_source_cmt(void)
 	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
 }
=20
+/* Version for Lion Cove and later */
+static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] =3D {
+	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),	/* 0x00: ukn L3 */
+	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x01: L1 hit */
+	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x02: L1 hit */
+	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),	/* 0x03: LFB/L1 Miss H=
andling Buffer hit */
+	0,							/* 0x04: Reserved */
+	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),	/* 0x05: L2 Hit */
+	OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE),			/* 0x06: L2 Miss Handling Buffe=
r Hit */
+	0,							/* 0x07: Reserved */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),	/* 0x08: L3 Hit */
+	0,							/* 0x09: Reserved */
+	0,							/* 0x0a: Reserved */
+	0,							/* 0x0b: Reserved */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOPX, FWD),	/* 0x0c: L3 Hit Snoop F=
wd */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),	/* 0x0d: L3 Hit Snoop H=
itM */
+	0,							/* 0x0e: Reserved */
+	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),	/*=
 0x0f: L3 Miss Snoop HitM */
+	OP_LH | LEVEL(MSC) | P(SNOOP, NONE),			/* 0x10: Memory-side Cache Hit */
+	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Me=
mory Hit */
+};
+
+void __init intel_pmu_pebs_data_source_lnl(void)
+{
+	u64 *data_source;
+
+	data_source =3D x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_sou=
rce;
+	memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
+
+	data_source =3D x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_sou=
rce;
+	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+	__intel_pmu_pebs_data_source_cmt(data_source);
+}
+
 static u64 precise_store_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
@@ -264,7 +307,7 @@ static u64 __adl_latency_data_grt(struct perf_event *ev=
ent, u64 status,
=20
 	WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type =3D=3D hybrid_big);
=20
-	dse &=3D PERF_PEBS_DATA_SOURCE_MASK;
+	dse &=3D PERF_PEBS_DATA_SOURCE_GRT_MASK;
 	val =3D hybrid_var(event->pmu, pebs_data_source)[dse];
=20
 	pebs_set_tlb_lock(&val, tlb, lock);
@@ -300,6 +343,45 @@ u64 mtl_latency_data_cmt(struct perf_event *event, u64=
 status)
 				      dse.mtl_fwd_blk);
 }
=20
+u64 lnl_latency_data(struct perf_event *event, u64 status)
+{
+	struct x86_hybrid_pmu *pmu =3D hybrid_pmu(event->pmu);
+	union intel_x86_pebs_dse dse;
+	union perf_mem_data_src src;
+	u64 val;
+
+	if (pmu->pmu_type =3D=3D hybrid_small)
+		return mtl_latency_data_cmt(event, status);
+
+	dse.val =3D status;
+
+	/* LNC core latency data */
+	val =3D hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_=
SOURCE_MASK];
+	if (!val)
+		val =3D P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
+
+	if (dse.lnc_stlb_miss)
+		val |=3D P(TLB, MISS) | P(TLB, L2);
+	else
+		val |=3D P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+	if (dse.lnc_locked)
+		val |=3D P(LOCK, LOCKED);
+
+	if (dse.lnc_data_blk)
+		val |=3D P(BLK, DATA);
+	if (dse.lnc_addr_blk)
+		val |=3D P(BLK, ADDR);
+	if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
+		val |=3D P(BLK, NA);
+
+	src.val =3D val;
+	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+		src.mem_op =3D P(OP, STORE);
+
+	return src.val;
+}
+
 static u64 load_latency_data(struct perf_event *event, u64 status)
 {
 	union intel_x86_pebs_dse dse;
@@ -1090,6 +1172,8 @@ struct event_constraint intel_lnc_pebs_event_constrai=
nts[] =3D {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PRE=
C_DIST */
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
=20
+	INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff),
+	INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED=
.STLB_MISS_LOADS */
 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED=
.STLB_MISS_STORES */
 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED=
.LOCK_LOADS */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e9374b4360d4..0d333bb9c8f4 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -476,6 +476,14 @@ struct cpu_hw_events {
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
=20
+#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_=
HSW)
+
+#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_=
HSW)
+
 /* Event constraint, but match on all event flags too. */
 #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
 	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -655,8 +663,10 @@ enum {
 	x86_lbr_exclusive_max,
 };
=20
-#define PERF_PEBS_DATA_SOURCE_MAX	0x10
+#define PERF_PEBS_DATA_SOURCE_MAX	0x100
 #define PERF_PEBS_DATA_SOURCE_MASK	(PERF_PEBS_DATA_SOURCE_MAX - 1)
+#define PERF_PEBS_DATA_SOURCE_GRT_MAX	0x10
+#define PERF_PEBS_DATA_SOURCE_GRT_MASK	(PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
=20
 enum hybrid_cpu_type {
 	HYBRID_INTEL_NONE,
@@ -1552,6 +1562,8 @@ u64 adl_latency_data_grt(struct perf_event *event, u6=
4 status);
=20
 u64 mtl_latency_data_cmt(struct perf_event *event, u64 status);
=20
+u64 lnl_latency_data(struct perf_event *event, u64 status);
+
 extern struct event_constraint intel_core2_pebs_event_constraints[];
=20
 extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1673,6 +1685,8 @@ void intel_pmu_pebs_data_source_mtl(void);
=20
 void intel_pmu_pebs_data_source_cmt(void);
=20
+void intel_pmu_pebs_data_source_lnl(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
=20
 void intel_pt_interrupt(void);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_even=
t.h
index 3a64499b0f5d..4842c36fdf80 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1349,12 +1349,14 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0x7 available */
+#define PERF_MEM_LVLNUM_L2_MHB	0x05 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC	0x06 /* Memory-side Cache */
+/* 0x7 available */
 #define PERF_MEM_LVLNUM_UNC	0x08 /* Uncached */
 #define PERF_MEM_LVLNUM_CXL	0x09 /* CXL */
 #define PERF_MEM_LVLNUM_IO	0x0a /* I/O */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
+#define PERF_MEM_LVLNUM_LFB	0x0c /* LFB / L1 Miss Handling Buffer */
 #define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
 #define PERF_MEM_LVLNUM_PMEM	0x0e /* PMEM */
 #define PERF_MEM_LVLNUM_NA	0x0f /* N/A */
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id EEE9C17A930
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:35 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339762; cv=none;
 b=cSP65ztrIWbuxuK1NeLgiCVYEkhGUW2OhINlDIPSk4g4c4kd1b40AL4IAtdCIeDXrKlzftXXNoZPjcPi+6IWoV4K+j3nXReO7BUWiIgxHOyzxA7NO3F552m7wSUsu1X2KUrEnc/Bs/DWsedNUD7yhqaudTe1xSpNCOe0kWHOHa4=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339762; c=relaxed/simple;
	bh=oyliGb9SJBroaFCNoK5ImTzKTS8q76GZV+Ze6fVUPMc=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=Fbl4N4RcHIjwHqB0wFEMvzZedB1jjwUB0dTpC5OWVhOwKosp3bpENrL8IWytrRRXFosXMQfULXq6YdJx0Ek0+pPVe+FSJZgW9HhIHPiETtgxr3zpS4zTOZTihFZ91PzlN0WOBRYuR939HVuBnFnQpCaxHaGfx63RcH2rY3/klUE=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=nB5x4Zoz; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="nB5x4Zoz"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339760; x=1750875760;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=oyliGb9SJBroaFCNoK5ImTzKTS8q76GZV+Ze6fVUPMc=;
  b=nB5x4Zoz3F6rqLEm9+dRRBrDXWhOSeqn3gkjN40O2I8Yl7jtlvVc1tQs
   J65QKSDMF8VFcGZVwxwdsVoICQIrXfStcEpUn0W6yLIJWSTyLJzaHnpQD
   Y4oqsopbY+VtaXdfWleYlcRbknD10BuP9bSSLI66Yjtvn2mP+A0cZ6kQj
   +XivfH0ujFSw7MLp4dqRq2kN18gw8MAsruL3jjz4vR+99Mt5YPVVJIjKp
   kJmVRY3ANa6CdVoitilbBMwd38sB8tTrO+tcL3dG1LbpMr8Lf/x6r+Lie
   p82Inr5TI4SuQYghnplgcDgAwo0lUrND9vim4wSerExjUmzatxIbpGuiZ
   w==;
X-CSE-ConnectionGUID: wkGB+b7bS0COdVWL0FXRLA==
X-CSE-MsgGUID: AM29JG5IRd2eSOPqKj654w==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204014"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204014"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: G7uVnQQSQ0Oewrqy9XZ6TQ==
X-CSE-MsgGUID: Fg2u0HKhRsihYB/OzT3vew==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913350"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:07 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [PATCH V2 06/13] perf/x86: Add config_mask to represent EVENTSEL
 bitmask
Date: Tue, 25 Jun 2024 11:22:49 -0700
Message-Id: <20240625182256.291914-7-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

Different vendors may support different fields in EVENTSEL MSR, such as
Intel would introduce new fields umask2 and eq bits in EVENTSEL MSR
since Perfmon version 6. However, a fixed mask X86_RAW_EVENT_MASK is
used to filter the attr.config.

Introduce a new config_mask to record the real supported EVENTSEL
bitmask.
Only apply it to the existing code now. No functional change.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Co-developed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/core.c       | 5 ++++-
 arch/x86/events/intel/core.c | 1 +
 arch/x86/events/perf_event.h | 7 +++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 848dbe9cbd0e..8ea1c988e19b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -624,7 +624,7 @@ int x86_pmu_hw_config(struct perf_event *event)
 		event->hw.config |=3D ARCH_PERFMON_EVENTSEL_OS;
=20
 	if (event->attr.type =3D=3D event->pmu->type)
-		event->hw.config |=3D event->attr.config & X86_RAW_EVENT_MASK;
+		event->hw.config |=3D x86_pmu_get_event_config(event);
=20
 	if (event->attr.sample_period && x86_pmu.limit_period) {
 		s64 left =3D event->attr.sample_period;
@@ -2098,6 +2098,9 @@ static int __init init_hw_perf_events(void)
 	if (!x86_pmu.intel_ctrl)
 		x86_pmu.intel_ctrl =3D x86_pmu.cntr_mask64;
=20
+	if (!x86_pmu.config_mask)
+		x86_pmu.config_mask =3D X86_RAW_EVENT_MASK;
+
 	perf_events_lapic_init();
 	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
=20
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ab61b9ef677a..23e074fd25e1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6144,6 +6144,7 @@ static __always_inline int intel_pmu_init_hybrid(enum=
 hybrid_pmu_type pmus)
 		pmu->cntr_mask64 =3D x86_pmu.cntr_mask64;
 		pmu->fixed_cntr_mask64 =3D x86_pmu.fixed_cntr_mask64;
 		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(pmu->cntr_mask64);
+		pmu->config_mask =3D X86_RAW_EVENT_MASK;
 		pmu->unconstrained =3D (struct event_constraint)
 				     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
 							0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 0d333bb9c8f4..a226565a9333 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -695,6 +695,7 @@ struct x86_hybrid_pmu {
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
 	u64				pebs_events_mask;
+	u64				config_mask;
 	union {
 			u64		cntr_mask64;
 			unsigned long	cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -790,6 +791,7 @@ struct x86_pmu {
 	int		(*rdpmc_index)(int index);
 	u64		(*event_map)(int);
 	int		max_events;
+	u64		config_mask;
 	union {
 			u64		cntr_mask64;
 			unsigned long	cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -1241,6 +1243,11 @@ static inline int x86_pmu_max_num_counters_fixed(str=
uct pmu *pmu)
 	return find_last_bit(hybrid(pmu, fixed_cntr_mask), INTEL_PMC_MAX_FIXED) +=
 1;
 }
=20
+static inline u64 x86_pmu_get_event_config(struct perf_event *event)
+{
+	return event->attr.config & hybrid(event->pmu, config_mask);
+}
+
 extern struct event_constraint emptyconstraint;
=20
 extern struct event_constraint unconstrained;
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 92C3C17920C
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:34 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339756; cv=none;
 b=nw2kIMYw47NpUC8pFxlBgcNotd/JnHaShkd9aijfPJ0tbNO6LIX7qil9E6xA0N95TchNXgMhEdkejVKeaE+FdIR3CP0bFit58qn99ZwUtPsapoTg1IJx828HMVfmCCGNtL/zyaHyOFAMVOZoM8wrFNHNeh2AL2BollwDGHsWQFA=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339756; c=relaxed/simple;
	bh=LrGl4EOfxR+3PLrm5aPGX1YqUdIgSEjDNNzwzRmDqc4=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=mnQ/yxWFC9iuFLnzDXXheKx+0WWcx7in/tiuj5ql0PEJOk45pbEnoN+JIU4MQfo016o4DTz6LEH8mSWzqVKwTSKZu+2j5p3jaTJZmkmu1aAJRqWGVpaY+cWJTPCgPApeMnc1QPn+CASu46HPuQc6Pq2Tt5DEOqCDWIOPJsVSnn4=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=nfXzZFT8; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="nfXzZFT8"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339755; x=1750875755;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=LrGl4EOfxR+3PLrm5aPGX1YqUdIgSEjDNNzwzRmDqc4=;
  b=nfXzZFT8JYFk7fB73e98k3g27Pw6jCwFjw/TE4K7mGuqEHA03L3otfu4
   CsOFTbeSVjx0uo2J7VqHsx+qXEFHId3jI+yfpX8mrUvgCQQObeehMH/Gb
   ufHU1aqDuQe8xZT6xiB04L1eIqo9hIN6+6kYhVWutkzOiZ802/tkW79zf
   xwlJprW+zdNg6Z19Z0b8gGMCK0hp8ZPjLKLsmrYzERClXkz3Pdij5RNk4
   5qSrjOFTxvYSoW77BFmnhzj09QT/lNpXQaE2jiSTjZ9BnEbYzjatTos8A
   d0wDV05MRAhhmPJV+kPZf/sHq1iDKzS4ss2LFZUnx4RgEMEsJBaUVkRRd
   g==;
X-CSE-ConnectionGUID: OeJ8obIlQG+J9X7iyWcfcw==
X-CSE-MsgGUID: KyMU9tCnTxSlXCk2ttAP2Q==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204006"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204006"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:21 -0700
X-CSE-ConnectionGUID: Nj7bymj0SX24SIm8wT3JGA==
X-CSE-MsgGUID: tpOy3ZJLT6a2C/umwyvQ7Q==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913352"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:07 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [PATCH V2 07/13] perf/x86/intel: Support PERFEVTSEL extension
Date: Tue, 25 Jun 2024 11:22:50 -0700
Message-Id: <20240625182256.291914-8-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

Two new fields (the unit mask2, and the equal flag) are added in the
IA32_PERFEVTSELx MSRs. They can be enumerated by the CPUID.23H.0.EBX.

Update the config_mask in x86_pmu and x86_hybrid_pmu for the true layout
of the PERFEVTSEL.
Expose the new formats into sysfs if they are available. The umask
extension reuses the same format attr name "umask" as the previous
umask. Add umask2_show to determine/display the correct format
for the current machine.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Co-developed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/intel/core.c      | 69 +++++++++++++++++++++++++++++--
 arch/x86/include/asm/perf_event.h |  4 ++
 2 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 23e074fd25e1..9d50e1049e30 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4632,8 +4632,55 @@ PMU_FORMAT_ATTR(pc,	"config:19"	);
 PMU_FORMAT_ATTR(any,	"config:21"	); /* v3 + */
 PMU_FORMAT_ATTR(inv,	"config:23"	);
 PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
-PMU_FORMAT_ATTR(in_tx,  "config:32");
-PMU_FORMAT_ATTR(in_tx_cp, "config:33");
+PMU_FORMAT_ATTR(in_tx,  "config:32"	);
+PMU_FORMAT_ATTR(in_tx_cp, "config:33"	);
+PMU_FORMAT_ATTR(eq,	"config:36"	); /* v6 + */
+
+static ssize_t umask2_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *page)
+{
+	u64 mask =3D hybrid(dev_get_drvdata(dev), config_mask) & ARCH_PERFMON_EVE=
NTSEL_UMASK2;
+
+	if (mask =3D=3D ARCH_PERFMON_EVENTSEL_UMASK2)
+		return sprintf(page, "config:8-15,40-47\n");
+
+	/* Roll back to the old format if umask2 is not supported. */
+	return sprintf(page, "config:8-15\n");
+}
+
+static struct device_attribute format_attr_umask2  =3D
+		__ATTR(umask, 0444, umask2_show, NULL);
+
+static struct attribute *format_evtsel_ext_attrs[] =3D {
+	&format_attr_umask2.attr,
+	&format_attr_eq.attr,
+	NULL
+};
+
+static umode_t
+evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+	struct device *dev =3D kobj_to_dev(kobj);
+	u64 mask;
+
+	/*
+	 * The umask and umask2 have different formats but share the
+	 * same attr name. In update mode, the previous value of the
+	 * umask is unconditionally removed before is_visible. If
+	 * umask2 format is not enumerated, it's impossible to roll
+	 * back to the old format.
+	 * Does the check in umask2_show rather than is_visible.
+	 */
+	if (i =3D=3D 0)
+		return attr->mode;
+
+	mask =3D hybrid(dev_get_drvdata(dev), config_mask);
+	if (i =3D=3D 1)
+		return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
+
+	return 0;
+}
=20
 static struct attribute *intel_arch_formats_attr[] =3D {
 	&format_attr_event.attr,
@@ -4786,8 +4833,14 @@ static inline bool intel_pmu_broken_perf_cap(void)
=20
 static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 {
-	unsigned int sub_bitmaps =3D cpuid_eax(ARCH_PERFMON_EXT_LEAF);
-	unsigned int eax, ebx, ecx, edx;
+	unsigned int sub_bitmaps, eax, ebx, ecx, edx;
+
+	cpuid(ARCH_PERFMON_EXT_LEAF, &sub_bitmaps, &ebx, &ecx, &edx);
+
+	if (ebx & ARCH_PERFMON_EXT_UMASK2)
+		pmu->config_mask |=3D ARCH_PERFMON_EVENTSEL_UMASK2;
+	if (ebx & ARCH_PERFMON_EXT_EQ)
+		pmu->config_mask |=3D ARCH_PERFMON_EVENTSEL_EQ;
=20
 	if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
 		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
@@ -5810,6 +5863,12 @@ static struct attribute_group group_format_extra_skl=
 =3D {
 	.is_visible =3D exra_is_visible,
 };
=20
+static struct attribute_group group_format_evtsel_ext =3D {
+	.name       =3D "format",
+	.attrs      =3D format_evtsel_ext_attrs,
+	.is_visible =3D evtsel_ext_is_visible,
+};
+
 static struct attribute_group group_default =3D {
 	.attrs      =3D intel_pmu_attrs,
 	.is_visible =3D default_is_visible,
@@ -5823,6 +5882,7 @@ static const struct attribute_group *attr_update[] =
=3D {
 	&group_caps_lbr,
 	&group_format_extra,
 	&group_format_extra_skl,
+	&group_format_evtsel_ext,
 	&group_default,
 	NULL,
 };
@@ -6042,6 +6102,7 @@ static const struct attribute_group *hybrid_attr_upda=
te[] =3D {
 	&group_caps_gen,
 	&group_caps_lbr,
 	&hybrid_group_format_extra,
+	&group_format_evtsel_ext,
 	&group_default,
 	&hybrid_group_cpus,
 	NULL,
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 400c909b8658..91b73571412f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -32,6 +32,8 @@
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
 #define ARCH_PERFMON_EVENTSEL_BR_CNTR			(1ULL << 35)
+#define ARCH_PERFMON_EVENTSEL_EQ			(1ULL << 36)
+#define ARCH_PERFMON_EVENTSEL_UMASK2			(0xFFULL << 40)
=20
 #define INTEL_FIXED_BITS_MASK				0xFULL
 #define INTEL_FIXED_BITS_STRIDE			4
@@ -185,6 +187,8 @@ union cpuid10_edx {
  * detection/enumeration details:
  */
 #define ARCH_PERFMON_EXT_LEAF			0x00000023
+#define ARCH_PERFMON_EXT_UMASK2			0x1
+#define ARCH_PERFMON_EXT_EQ			0x2
 #define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT	0x1
 #define ARCH_PERFMON_NUM_COUNTER_LEAF		0x1
=20
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id EFBF817C9E0
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:40 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339762; cv=none;
 b=g2hsmPbqjvWMVsMWkI1h/nBXKp0pctaeAx0JwcPX9LftfhzMd90MtSM81c6uCBbngZgwl6KURHX4f3UM6mO905gQOeZdBaU8iY0KKZBnqDATHak7Gaq754JVtNXHJJgwn6Kdgq14+QaTTbHE2yuQ3zsfL/c6fd/iOIlB3xKj9+Q=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339762; c=relaxed/simple;
	bh=nanV8raTMRbMVt6TKAy0q7qCR5/RuNbuzranArb5UZo=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=t4WemsMG43RCK+0zxbk1XlzK1UwFrMWASUf9QguVLOj67psoKx5cBGzQ4jiaGPcmQPQyOrgCA8E0Xt20OWCQIQXB9niEB968CL3YjVB3Pkz8CT0Vr98zYeGAwApP46fFe8I5U4SxMGIKboHfjkYzMOJIh3DjuFwfRvKX4k2dp0o=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=KxxycWEu; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="KxxycWEu"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339761; x=1750875761;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=nanV8raTMRbMVt6TKAy0q7qCR5/RuNbuzranArb5UZo=;
  b=KxxycWEutsLpvhmQR7V8QvfQfMK1DzUAFWqqLg3YKwhM2ZyyPYRMb1C3
   10NQJTK9dbiHBOig71IPU0gbci6+GaDZxmX3KQehm9aZxNNtoJJiWO/d7
   DV4D+Wy23f2OPFKxl+QgU8PAqz+CYvk/qtViyM9H9iuQJLR3160im2QDJ
   k/bgFdRourVA22zOKx+evkMFyFM+0H+TKL/Qv2Rf3ayCHbOzr5ImWb/Dz
   bKbadVrSP8v4vbqLM4oKP9caCOx1Zi9j10eu9o1uygzlc2jB2iM28thL6
   7CUYsORMrk2h8ZyrOBBoSCt+no+1UJWWT/VFXco0WS9WICzvuaKf/YVVE
   A==;
X-CSE-ConnectionGUID: 2ZjidOKVQPy/y0sQSHw+Qw==
X-CSE-MsgGUID: 1Qu6+BKlR/6zRYCgV9I1AQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204028"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204028"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: HmQZdX4/R9mUE8wQs/jTPQ==
X-CSE-MsgGUID: 5UDXEEu2RD65BxfoJtZ3fA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913354"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:07 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 08/13] perf/x86/intel: Support Perfmon MSRs aliasing
Date: Tue, 25 Jun 2024 11:22:51 -0700
Message-Id: <20240625182256.291914-9-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The architectural performance monitoring V6 supports a new range of
counters' MSRs in the 19xxH address range. They include all the GP
counter MSRs, the GP control MSRs, and the fixed counter MSRs.

The step between each sibling counter is 4. Add intel_pmu_addr_offset()
to calculate the correct offset.

Add fixedctr in struct x86_pmu to store the address of the fixed counter
0. It can be used to calculate the rest of the fixed counters.

The MSR address of the fixed counter control is not changed.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/core.c           |  7 +++----
 arch/x86/events/intel/core.c     | 17 ++++++++++++++++-
 arch/x86/events/perf_event.h     |  7 +++++++
 arch/x86/include/asm/msr-index.h |  6 ++++++
 4 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8ea1c988e19b..975b0f8a0b00 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1236,8 +1236,7 @@ static inline void x86_assign_hw_event(struct perf_ev=
ent *event,
 		fallthrough;
 	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
 		hwc->config_base =3D MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		hwc->event_base =3D MSR_ARCH_PERFMON_FIXED_CTR0 +
-				(idx - INTEL_PMC_IDX_FIXED);
+		hwc->event_base =3D x86_pmu_fixed_ctr_addr(idx - INTEL_PMC_IDX_FIXED);
 		hwc->event_base_rdpmc =3D (idx - INTEL_PMC_IDX_FIXED) |
 					INTEL_PMC_FIXED_RDPMC_BASE;
 		break;
@@ -1573,7 +1572,7 @@ void perf_event_print_debug(void)
 	for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
 		if (fixed_counter_disabled(idx, cpuc->pmu))
 			continue;
-		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+		rdmsrl(x86_pmu_fixed_ctr_addr(idx), pmc_count);
=20
 		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
 			cpu, idx, pmc_count);
@@ -2483,7 +2482,7 @@ void perf_clear_dirty_counters(void)
 			if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mas=
k)))
 				continue;
=20
-			wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+			wrmsrl(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
 		} else {
 			wrmsrl(x86_pmu_event_addr(i), 0);
 		}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9d50e1049e30..4810ff269b9a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2953,7 +2953,7 @@ static void intel_pmu_reset(void)
 	for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
 		if (fixed_counter_disabled(idx, cpuc->pmu))
 			continue;
-		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+		wrmsrl_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
 	}
=20
 	if (ds)
@@ -5188,6 +5188,7 @@ static __initconst const struct x86_pmu core_pmu =3D {
 	.schedule_events	=3D x86_schedule_events,
 	.eventsel		=3D MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		=3D MSR_ARCH_PERFMON_PERFCTR0,
+	.fixedctr		=3D MSR_ARCH_PERFMON_FIXED_CTR0,
 	.event_map		=3D intel_pmu_event_map,
 	.max_events		=3D ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			=3D 1,
@@ -5241,6 +5242,7 @@ static __initconst const struct x86_pmu intel_pmu =3D=
 {
 	.schedule_events	=3D x86_schedule_events,
 	.eventsel		=3D MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		=3D MSR_ARCH_PERFMON_PERFCTR0,
+	.fixedctr		=3D MSR_ARCH_PERFMON_FIXED_CTR0,
 	.event_map		=3D intel_pmu_event_map,
 	.max_events		=3D ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			=3D 1,
@@ -6176,6 +6178,11 @@ static void intel_pmu_check_extra_regs(struct extra_=
reg *extra_regs)
 	}
 }
=20
+static inline int intel_pmu_addr_offset(int index, bool eventsel)
+{
+	return MSR_IA32_PMC_V6_STEP * index;
+}
+
 static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_=
pmu_type_map[] __initconst =3D {
 	{ hybrid_small, "cpu_atom" },
 	{ hybrid_big, "cpu_core" },
@@ -7150,6 +7157,14 @@ __init int intel_pmu_init(void)
 		pr_cont("full-width counters, ");
 	}
=20
+	/* Support V6+ MSR Aliasing */
+	if (x86_pmu.version >=3D 6) {
+		x86_pmu.perfctr =3D MSR_IA32_PMC_V6_GP0_CTR;
+		x86_pmu.eventsel =3D MSR_IA32_PMC_V6_GP0_CFG_A;
+		x86_pmu.fixedctr =3D MSR_IA32_PMC_V6_FX0_CTR;
+		x86_pmu.addr_offset =3D intel_pmu_addr_offset;
+	}
+
 	if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
 		x86_pmu.intel_ctrl |=3D 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
=20
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a226565a9333..8e3f2644a1a3 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -787,6 +787,7 @@ struct x86_pmu {
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
 	unsigned	perfctr;
+	unsigned	fixedctr;
 	int		(*addr_offset)(int index, bool eventsel);
 	int		(*rdpmc_index)(int index);
 	u64		(*event_map)(int);
@@ -1144,6 +1145,12 @@ static inline unsigned int x86_pmu_event_addr(int in=
dex)
 				  x86_pmu.addr_offset(index, false) : index);
 }
=20
+static inline unsigned int x86_pmu_fixed_ctr_addr(int index)
+{
+	return x86_pmu.fixedctr + (x86_pmu.addr_offset ?
+				   x86_pmu.addr_offset(index, false) : index);
+}
+
 static inline int x86_pmu_rdpmc_index(int index)
 {
 	return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-in=
dex.h
index e022e6eb766c..048081b226d7 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -566,6 +566,12 @@
 #define MSR_RELOAD_PMC0			0x000014c1
 #define MSR_RELOAD_FIXED_CTR0		0x00001309
=20
+/* V6 PMON MSR range */
+#define MSR_IA32_PMC_V6_GP0_CTR		0x1900
+#define MSR_IA32_PMC_V6_GP0_CFG_A	0x1901
+#define MSR_IA32_PMC_V6_FX0_CTR		0x1980
+#define MSR_IA32_PMC_V6_STEP		4
+
 /* KeyID partitioning between MKTME and TDX */
 #define MSR_IA32_MKTME_KEYID_PARTITIONING	0x00000087
=20
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id E3E4217CA13
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:42 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339764; cv=none;
 b=lscu842NK3sgnbSukrNm7OchUlm6UDHmlhaHBwMIrOrV43WGVAWnBcKHOQo8MGo9dUbMXDF54Saty+FrxQRrTIm9zuSAjhWn2zkQwnNJb+yUfGmv4RwiFMobur7pEQdZ0EMCTbI6pe/G+PgiaVhkQjbaaiX2y3rDQKFxzs8eWs4=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339764; c=relaxed/simple;
	bh=aAnI62zH+6QngDV+jskU/yPVEijHGCG7ivpJaO16TRc=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=TY0TZ/r3JIwUfBvSrwlAKQz0SL1AiBjutTN/4LrKjXmv4DkEomfJfKhS7J4t2RPtjC3l3MJk7+lZIvW8vFUzUq50SknQX4MvI0FW13HFp3dtLRX7omc8WIsp9fz64Ql6e02aMK4AWnQ+mdTmg5ez80KzjCjvc6Z5bjikpYFc1o8=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=W+aVW/QD; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="W+aVW/QD"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339763; x=1750875763;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=aAnI62zH+6QngDV+jskU/yPVEijHGCG7ivpJaO16TRc=;
  b=W+aVW/QD6wu0aG6e9jwga4uZcn8vx436S2dFeW++L1VC/hmWeZDhwTm1
   8mtUQguR0CYed5KXbnZ+20n32jNw3Ok/4CPQAYXoDu3qIZU8z4AF80Uvf
   Tj1HTYkcMktjf3gogUqepTujwo8+I+wIg++69feXX6fg8Br0iCW3QpdOw
   vgiwXeRrZSbAFtZoS7/NSWyY0h8PwIDHl+pjSLbQ83y6Fw2K0astEowLZ
   2p1wA1LYrx9YKFSOmBYvcQc1UwLrXLMGNzfsq3R5H0dlv36v8M71+TSdQ
   40SJnSa+aI63qfDByUEh0b/x9FG5shnYpyLAjewB0kRkM1v6aEyf8W0WA
   w==;
X-CSE-ConnectionGUID: TixRUIcqTJCewy2s/H3SbA==
X-CSE-MsgGUID: zCN0FyF6QCeelQxyBKDUrQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204035"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204035"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: ekYt1c3IQCWro0dXKeAdlg==
X-CSE-MsgGUID: 6mBpSykhThu+iVG4hwS6vg==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913356"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:07 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>,
	Sandipan Das <sandipan.das@amd.com>,
	Ravi Bangoria <ravi.bangoria@amd.com>,
	silviazhao <silviazhao-oc@zhaoxin.com>,
	CodyYao-oc <CodyYao-oc@zhaoxin.com>
Subject: [PATCH V2 09/13] perf/x86: Extend event update interface
Date: Tue, 25 Jun 2024 11:22:52 -0700
Message-Id: <20240625182256.291914-10-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The current event update interface directly reads the values from the
counter, but the values may not be the accurate ones users require. For
example, the sample read feature wants the counter value of the member
events when the leader event is overflow. But with the current
implementation, the read (event update) actually happens in the NMI
handler. There may be a small gap between the overflow and the NMI
handler. The new Intel PEBS counters snapshotting feature can provide
the accurate counter value in the overflow. The event update interface
has to be updated to apply the given accurate values.

Pass the accurate values via the event update interface. If the value is
not available, still directly read the counter.

Using u64 * rather than u64 as the new parameter. Because 0 might be a
valid rdpmc() value. The !val cannot be used to distinguish between
there begin an argument and there not being one. Also, for some cases,
e.g., intel_update_topdown_event, there could be more than one
counter/register are read.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: silviazhao <silviazhao-oc@zhaoxin.com>
Cc: CodyYao-oc <CodyYao-oc@zhaoxin.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/amd/core.c     |  2 +-
 arch/x86/events/core.c         | 13 ++++++-----
 arch/x86/events/intel/core.c   | 40 +++++++++++++++++++---------------
 arch/x86/events/intel/p4.c     |  2 +-
 arch/x86/events/perf_event.h   |  4 ++--
 arch/x86/events/zhaoxin/core.c |  2 +-
 6 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 42a9f97a9aed..7e017474ddb5 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -984,7 +984,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
=20
 		event =3D cpuc->events[idx];
 		hwc =3D &event->hw;
-		x86_perf_event_update(event);
+		x86_perf_event_update(event, NULL);
 		mask =3D BIT_ULL(idx);
=20
 		if (!(status & mask))
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 975b0f8a0b00..38754136ef82 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -112,7 +112,7 @@ u64 __read_mostly hw_cache_extra_regs
  * Can only be executed on the CPU where the event is active.
  * Returns the delta events processed.
  */
-u64 x86_perf_event_update(struct perf_event *event)
+u64 x86_perf_event_update(struct perf_event *event, u64 *val)
 {
 	struct hw_perf_event *hwc =3D &event->hw;
 	int shift =3D 64 - x86_pmu.cntval_bits;
@@ -131,7 +131,10 @@ u64 x86_perf_event_update(struct perf_event *event)
 	 */
 	prev_raw_count =3D local64_read(&hwc->prev_count);
 	do {
-		rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+		if (!val)
+			rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+		else
+			new_raw_count =3D *val;
 	} while (!local64_try_cmpxchg(&hwc->prev_count,
 				      &prev_raw_count, new_raw_count));
=20
@@ -1598,7 +1601,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
 		 * Drain the remaining delta count out of a event
 		 * that we are disabling:
 		 */
-		static_call(x86_pmu_update)(event);
+		static_call(x86_pmu_update)(event, NULL);
 		hwc->state |=3D PERF_HES_UPTODATE;
 	}
 }
@@ -1689,7 +1692,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
=20
 		event =3D cpuc->events[idx];
=20
-		val =3D static_call(x86_pmu_update)(event);
+		val =3D static_call(x86_pmu_update)(event, NULL);
 		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 			continue;
=20
@@ -2036,7 +2039,7 @@ static void x86_pmu_static_call_update(void)
=20
 static void _x86_pmu_read(struct perf_event *event)
 {
-	static_call(x86_pmu_update)(event);
+	static_call(x86_pmu_update)(event, NULL);
 }
=20
 void x86_pmu_show_pmu_cap(struct pmu *pmu)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 4810ff269b9a..8e9f2813eeea 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2418,7 +2418,7 @@ static void intel_pmu_nhm_workaround(void)
 	for (i =3D 0; i < 4; i++) {
 		event =3D cpuc->events[i];
 		if (event)
-			static_call(x86_pmu_update)(event);
+			static_call(x86_pmu_update)(event, NULL);
 	}
=20
 	for (i =3D 0; i < 4; i++) {
@@ -2710,7 +2710,7 @@ static void update_saved_topdown_regs(struct perf_eve=
nt *event, u64 slots,
  * modify by a NMI. PMU has to be disabled before calling this function.
  */
=20
-static u64 intel_update_topdown_event(struct perf_event *event, int metric=
_end)
+static u64 intel_update_topdown_event(struct perf_event *event, int metric=
_end, u64 *val)
 {
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	struct perf_event *other;
@@ -2718,13 +2718,18 @@ static u64 intel_update_topdown_event(struct perf_e=
vent *event, int metric_end)
 	bool reset =3D true;
 	int idx;
=20
-	/* read Fixed counter 3 */
-	rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
-	if (!slots)
-		return 0;
+	if (!val) {
+		/* read Fixed counter 3 */
+		rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
+		if (!slots)
+			return 0;
=20
-	/* read PERF_METRICS */
-	rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+		/* read PERF_METRICS */
+		rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+	} else {
+		slots =3D val[0];
+		metrics =3D val[1];
+	}
=20
 	for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
 		if (!is_topdown_idx(idx))
@@ -2767,10 +2772,11 @@ static u64 intel_update_topdown_event(struct perf_e=
vent *event, int metric_end)
 	return slots;
 }
=20
-static u64 icl_update_topdown_event(struct perf_event *event)
+static u64 icl_update_topdown_event(struct perf_event *event, u64 *val)
 {
 	return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
-						 x86_pmu.num_topdown_events - 1);
+						 x86_pmu.num_topdown_events - 1,
+					  val);
 }
=20
 DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
@@ -2785,7 +2791,7 @@ static void intel_pmu_read_topdown_event(struct perf_=
event *event)
 		return;
=20
 	perf_pmu_disable(event->pmu);
-	static_call(intel_pmu_update_topdown_event)(event);
+	static_call(intel_pmu_update_topdown_event)(event, NULL);
 	perf_pmu_enable(event->pmu);
 }
=20
@@ -2796,7 +2802,7 @@ static void intel_pmu_read_event(struct perf_event *e=
vent)
 	else if (is_topdown_count(event))
 		intel_pmu_read_topdown_event(event);
 	else
-		x86_perf_event_update(event);
+		x86_perf_event_update(event, NULL);
 }
=20
 static void intel_pmu_enable_fixed(struct perf_event *event)
@@ -2899,7 +2905,7 @@ static void intel_pmu_add_event(struct perf_event *ev=
ent)
  */
 int intel_pmu_save_and_restart(struct perf_event *event)
 {
-	static_call(x86_pmu_update)(event);
+	static_call(x86_pmu_update)(event, NULL);
 	/*
 	 * For a checkpointed counter always reset back to 0.  This
 	 * avoids a situation where the counter overflows, aborts the
@@ -2922,12 +2928,12 @@ static int intel_pmu_set_period(struct perf_event *=
event)
 	return x86_perf_event_set_period(event);
 }
=20
-static u64 intel_pmu_update(struct perf_event *event)
+static u64 intel_pmu_update(struct perf_event *event, u64 *val)
 {
 	if (unlikely(is_topdown_count(event)))
-		return static_call(intel_pmu_update_topdown_event)(event);
+		return static_call(intel_pmu_update_topdown_event)(event, val);
=20
-	return x86_perf_event_update(event);
+	return x86_perf_event_update(event, val);
 }
=20
 static void intel_pmu_reset(void)
@@ -3091,7 +3097,7 @@ static int handle_pmi_common(struct pt_regs *regs, u6=
4 status)
 	 */
 	if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned lo=
ng *)&status)) {
 		handled++;
-		static_call(intel_pmu_update_topdown_event)(NULL);
+		static_call(intel_pmu_update_topdown_event)(NULL, NULL);
 	}
=20
 	/*
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 844bc4fc4724..3177be0dedd1 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -1058,7 +1058,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 		/* it might be unflagged overflow */
 		overflow =3D p4_pmu_clear_cccr_ovf(hwc);
=20
-		val =3D x86_perf_event_update(event);
+		val =3D x86_perf_event_update(event, NULL);
 		if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
 			continue;
=20
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8e3f2644a1a3..f6b57f0b2787 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -782,7 +782,7 @@ struct x86_pmu {
 	void		(*del)(struct perf_event *);
 	void		(*read)(struct perf_event *event);
 	int		(*set_period)(struct perf_event *event);
-	u64		(*update)(struct perf_event *event);
+	u64		(*update)(struct perf_event *event, u64 *val);
 	int		(*hw_config)(struct perf_event *event);
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
@@ -1131,7 +1131,7 @@ extern u64 __read_mostly hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
=20
-u64 x86_perf_event_update(struct perf_event *event);
+u64 x86_perf_event_update(struct perf_event *event, u64 *cntr);
=20
 static inline unsigned int x86_pmu_config_addr(int index)
 {
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 2fd9b0cf9a5e..5fe3a9eed650 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -391,7 +391,7 @@ static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
 		if (!test_bit(bit, cpuc->active_mask))
 			continue;
=20
-		x86_perf_event_update(event);
+		x86_perf_event_update(event, NULL);
 		perf_sample_data_init(&data, 0, event->hw.last_period);
=20
 		if (!x86_perf_event_set_period(event))
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id EB62717CA17
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:42 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339764; cv=none;
 b=E6kZFjCBpECdLAQ6lZoVNPcBTWcFZecBpIBvtruJB607loV2AQ+o6ALyc2U4jdeD9UbhOQslNrdl6cJFrq6dnvBRvD/xSrXr7hkA/u4OTxc5AeJeW6McF7M9d2rZXCaA3MW6tZOVTEfwElSiBQE4fm5YMTTHDzljlOD5L6znEjg=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339764; c=relaxed/simple;
	bh=7trYQVtVfDfgh31uDwek2t/HxT/pSeUZ7YfMoL5efRs=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=L5u16StBpWC5OmU1bFxYTxK4qxh0adQnw5Ugv7/0KoU+eaGg2PJq5rGMiT8w8UND2IVwlPoqeTrn3WPb0l1MXjvOMcwXpV0eOVRhRuZXBfLCXczXwY4cu/sYsy3T00Ukg5L3wr+M+xJaStyrwlML/joXPk0AXe/sR+shCUQtHQQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=YTn+2DF4; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="YTn+2DF4"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339763; x=1750875763;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=7trYQVtVfDfgh31uDwek2t/HxT/pSeUZ7YfMoL5efRs=;
  b=YTn+2DF4J6plmSdjY/yPoPerFAFavgYm23Ns9GsJR2dBmkaptH5B622i
   ORVYb4NXLX+VdGXcMB9nfoPEXmixN7k7/i8Z0Ldy9ZDhzibujj3oXPFaq
   X28d+Q37w6v7Xk4AKmMkZNtgWHM1IBgZ8/1XELFTfkruRTddISgBpjXMj
   83AdwRCdu0GGAAvzQ0x55ftSeD41mngbyKcrIVBoJ9n+dAkUde39/uUqd
   j67suovQ5ZwsX7ShSMY2HQNI+aQlMKr6BKwXQTnLSHVl6YoKX09a+8HF7
   aoCtCSLZW8tYc7I6SWBFoO5UWsAnuUbN11fUxdAAICfkmjvx+sQkvAaDY
   Q==;
X-CSE-ConnectionGUID: 3dU6g9y7RFGdvfjEFvTZ6g==
X-CSE-MsgGUID: k5bsSdMcRLKDS1Yy+eyx1Q==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204040"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204040"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: 71CNM84ZTRCsiusD3ipyag==
X-CSE-MsgGUID: eTNDkOkoT9mhnbrvJInNbQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913358"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:07 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 10/13] perf: Extend perf_output_read
Date: Tue, 25 Jun 2024 11:22:53 -0700
Message-Id: <20240625182256.291914-11-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The event may have been updated in the PMU-specific implementation,
e.g., Intel PEBS counters snapshotting. The common code should not
read and overwrite the value.

The PERF_SAMPLE_READ in the data->sample_type can be used to detect
whether the PMU-specific value is available. If yes, avoid the
pmu->read() in the common code.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 kernel/events/core.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f908f077935..733e507948e6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7243,7 +7243,7 @@ static void perf_output_read_one(struct perf_output_h=
andle *handle,
=20
 static void perf_output_read_group(struct perf_output_handle *handle,
 			    struct perf_event *event,
-			    u64 enabled, u64 running)
+			    u64 enabled, u64 running, bool read)
 {
 	struct perf_event *leader =3D event->group_leader, *sub;
 	u64 read_format =3D event->attr.read_format;
@@ -7265,7 +7265,7 @@ static void perf_output_read_group(struct perf_output=
_handle *handle,
 	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] =3D running;
=20
-	if ((leader !=3D event) &&
+	if ((leader !=3D event) && read &&
 	    (leader->state =3D=3D PERF_EVENT_STATE_ACTIVE))
 		leader->pmu->read(leader);
=20
@@ -7280,7 +7280,7 @@ static void perf_output_read_group(struct perf_output=
_handle *handle,
 	for_each_sibling_event(sub, leader) {
 		n =3D 0;
=20
-		if ((sub !=3D event) &&
+		if ((sub !=3D event) && read &&
 		    (sub->state =3D=3D PERF_EVENT_STATE_ACTIVE))
 			sub->pmu->read(sub);
=20
@@ -7307,7 +7307,8 @@ static void perf_output_read_group(struct perf_output=
_handle *handle,
  * on another CPU, from interrupt/NMI context.
  */
 static void perf_output_read(struct perf_output_handle *handle,
-			     struct perf_event *event)
+			     struct perf_event *event,
+			     bool read)
 {
 	u64 enabled =3D 0, running =3D 0, now;
 	u64 read_format =3D event->attr.read_format;
@@ -7325,7 +7326,7 @@ static void perf_output_read(struct perf_output_handl=
e *handle,
 		calc_timer_values(event, &now, &enabled, &running);
=20
 	if (event->attr.read_format & PERF_FORMAT_GROUP)
-		perf_output_read_group(handle, event, enabled, running);
+		perf_output_read_group(handle, event, enabled, running, read);
 	else
 		perf_output_read_one(handle, event, enabled, running);
 }
@@ -7367,7 +7368,7 @@ void perf_output_sample(struct perf_output_handle *ha=
ndle,
 		perf_output_put(handle, data->period);
=20
 	if (sample_type & PERF_SAMPLE_READ)
-		perf_output_read(handle, event);
+		perf_output_read(handle, event, !(data->sample_flags & PERF_SAMPLE_READ)=
);
=20
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size =3D 1;
@@ -7968,7 +7969,7 @@ perf_event_read_event(struct perf_event *event,
 		return;
=20
 	perf_output_put(&handle, read_event);
-	perf_output_read(&handle, event);
+	perf_output_read(&handle, event, true);
 	perf_event__output_id_sample(event, &handle, &sample);
=20
 	perf_output_end(&handle);
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 218A817D37B
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:45 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339766; cv=none;
 b=MtmmUjm0LHUPvaSGTfmKU0tTnD6NS6YpxyksTRgjxKaTX9wNA1B/eCaaeS38X5g9WIJdwruPvq513xyuhYwp5fNb9hOO7LtlIW8frc7Sz2fGvk+ZyuKyuCS7wSVGCUsYTKqYsaea6rCNbySUNkEDV4aDNwc+Uje6fe46kG89MHE=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339766; c=relaxed/simple;
	bh=FHkh/MshJSLY9lqDYo4dzFGcqmMfGB3r24Zwlhzxkno=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=XQMhbnXOnqUu98Cwq1rJQKCvHww4I4NYGkBctD+cTUayGw11jegtyaSyPIUxnr+4GWmUnDPA5JIiWOuDrrasaW6BvpWbkCQhFcJC2GG0XtmNrpqXbsASoBHIrouKXh2X731rcE2OAGu66PcsrbTFFcO8hyFnAkoRl4iewTxHylY=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=JINe+iaM; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="JINe+iaM"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339765; x=1750875765;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=FHkh/MshJSLY9lqDYo4dzFGcqmMfGB3r24Zwlhzxkno=;
  b=JINe+iaMLD0zSTV1nUXZvt7Gu3dqNLXHtK69zqjnUatLf2DGKo45n8oz
   PJ8jxARqXM88Yddtr19z3r2QwNmui9w4sgBtPvpflTbyrweAAfhYe+ZmP
   lMp4JU9u2BJ05+rOSWXtSUNKSc8qs3qPdIy9+4ieR/YGKIFE3zFUwEHgx
   Bl9KqcH9nLZ9+E78zo9wT6GpRUfQWjsN+vRcY8cSd/SkRbGJKRfDa5jpP
   le7dbGbPWCQLoUqwMebxawWuvxP2njs3u37W+y+gjM9x3FR0IBw0/+xY7
   Su+DL44WrRvlIQdC7KCzS1CgTCHJM1Cn76uRTrPgGmA9Zjth6k893hdmF
   Q==;
X-CSE-ConnectionGUID: YVdauasVTV6sN9RRYIjbdg==
X-CSE-MsgGUID: sgIoOP8VQcOumPCj7vT8Wg==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204050"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204050"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: 0mCL1VwrT62s4zUtlqfeyw==
X-CSE-MsgGUID: pIHzsLkIRDenXuc2/kQDEw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913363"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:07 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 11/13] perf/x86/intel: Move PEBS event update after the
 sample output
Date: Tue, 25 Jun 2024 11:22:54 -0700
Message-Id: <20240625182256.291914-12-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

In the drain_pebs(), besides outputting the sample data, the perf needs
to update the PEBS event (e.g., prev_count, event->count, etc.) as well.
Both operations may invoke the perf_event_update(), but the sequence of
the two operations doesn't matter for now. Because the updated event
value is read directly from the counter via rdpmc. The counter stops in
the drain_pebs().

But if the updated event value is from different places (PEBS record VS.
counter), the sequence does matter. For example, with the new Intel PEBS
counters snapshotting feature, the large PEBS can be enabled for the
sample read, since counter values for each sample are recorded in PEBS
records. The current perf does the PEBS event update first, which also
updates the event for all the records altogether. It's impossible for
the later sample read output to dump the value for each sample, since
the prev_count is already the newest one from the current counter.

Move PEBS event update after the sample output. For each sample read
output, it will update and output the value only for this sample
(according to the value in the PEBS record). Once all samples are
output, update the PEBS event again according to the current counter,
and set the left period.

The !intel_pmu_save_and_restart() only happens when !hwc->event_base
or the left > 0. The !hwc->event_base is impossible for the PEBS event
which is only available on GP and fixed counters.
The __intel_pmu_pebs_event() is only to process the overflowed sample.
The left should be always <=3D0.
It's safe to ignore the return from the !inel_pmu_save_and_restart()
check.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/intel/ds.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ce7e98409f29..fb04ef307f7b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2158,17 +2158,6 @@ __intel_pmu_pebs_event(struct perf_event *event,
 	void *at =3D get_next_pebs_record_by_bit(base, top, bit);
 	static struct pt_regs dummy_iregs;
=20
-	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
-		/*
-		 * Now, auto-reload is only enabled in fixed period mode.
-		 * The reload value is always hwc->sample_period.
-		 * May need to change it, if auto-reload is enabled in
-		 * freq mode later.
-		 */
-		intel_pmu_save_and_restart_reload(event, count);
-	} else if (!intel_pmu_save_and_restart(event))
-		return;
-
 	if (!iregs)
 		iregs =3D &dummy_iregs;
=20
@@ -2197,6 +2186,17 @@ __intel_pmu_pebs_event(struct perf_event *event,
 		if (perf_event_overflow(event, data, regs))
 			x86_pmu_stop(event, 0);
 	}
+
+	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+		/*
+		 * Now, auto-reload is only enabled in fixed period mode.
+		 * The reload value is always hwc->sample_period.
+		 * May need to change it, if auto-reload is enabled in
+		 * freq mode later.
+		 */
+		intel_pmu_save_and_restart_reload(event, count);
+	} else
+		intel_pmu_save_and_restart(event);
 }
=20
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_s=
ample_data *data)
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id E997517D897
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:45 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339767; cv=none;
 b=iZDZ7fj8JRsQc7ixqwLGZEwKioJRcLcywwbVv5pcCub4svbwOtJOAwjYgfhZ6uDsGJ5EwdndF3cYBsvV6dvN5k2wOYjFwAHrJJveXgsmm+Yr4RoDxXFG82vZeDNxYzfxdCKEA3PResngdTL613sfvBvFKjeM3Y7FjqiPcrguzbI=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339767; c=relaxed/simple;
	bh=GleUOjPPjnP33AJhS6CXDu9L1V5Y0svb/AoyEMv+8MQ=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=jEUUUncnkUb708IA7lVDSrYTp9QDCUF4ypREg372kmJoXbPYOXAkN/eNHwVRsgcWn7MB8f/iIppEnxuaT8PLzBbVXThDw2PDSi0Dp3cxfIxbG69OJuYgcJoiAy0szzd7xuBdQkOoo5xwGjMC+CwrIaPhFHIB1HqOhBDDhfYYaHc=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=ZL95U8wj; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="ZL95U8wj"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339766; x=1750875766;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=GleUOjPPjnP33AJhS6CXDu9L1V5Y0svb/AoyEMv+8MQ=;
  b=ZL95U8wjR019H6pIqSmZnYZdwtenSPZIiKk7eWJLQxiwxkfSeis5YjTr
   gSPoulmusF300a5+sUHchjBe+CVWMDOFCP6MBmt9+WsjXI0PofIHc4Rbz
   daBi+KDZrHpwRT8GmsQRFx21aPtVdwMPjjt6VQ6BSocbHGEJh3A47uyRF
   QhbsTkRxK2n8tV76K48motQXQGsNT9jZEzDySt/b415PY+koAKFAP58JT
   +peLCZ/nwZTlJjGIVs/qT8a5RpbOETEPA74J/r0ZiK3F3w1hCxuSJWQLk
   mUwM0hheXXNGccrU0nc2y0bFQyJ+TX3nzkIyo/n+CvWgkIV8q3uSrcyz5
   A==;
X-CSE-ConnectionGUID: pyG5ejy7QDCAqC3UTUC8iQ==
X-CSE-MsgGUID: vXRXpo1tTGWDkKbTseQj2A==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204052"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204052"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: +7JA6wl7QD+DEk0LNO36SA==
X-CSE-MsgGUID: fM2wyfryT3eJzjGRN9s+jw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913369"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:07 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 12/13] perf/x86/intel: Support PEBS counters snapshotting
Date: Tue, 25 Jun 2024 11:22:55 -0700
Message-Id: <20240625182256.291914-13-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The counters snapshotting is a new adaptive PEBS extension, which can
capture programmable counters, fixed-function counters, and performance
metrics in a PEBS record. The feature is available in the PEBS format
V6.

The target counters can be configured in the new fields of MSR_PEBS_CFG.
Then the PEBS HW will generate the bit mask of counters (Counters Group
Header) followed by the content of all the requested counters into a
PEBS record.

The current Linux perf sample read feature intends to read the counters
of other member events when the leader event is overflowing. But the
current read is in the NMI handler, which may has a small gap from
overflow. Using the counters snapshotting feature for the sample read.

Add a new PEBS_CNTR flag to indicate a sample read group that utilizes
the counters snapshotting feature. When the group is scheduled, the
PEBS configure can be updated accordingly.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/intel/core.c       |  33 ++++++++-
 arch/x86/events/intel/ds.c         | 114 +++++++++++++++++++++++++++--
 arch/x86/events/perf_event.h       |   3 +
 arch/x86/events/perf_event_flags.h |   2 +-
 arch/x86/include/asm/perf_event.h  |  15 ++++
 5 files changed, 157 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8e9f2813eeea..33e189710073 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4058,6 +4058,19 @@ static int intel_pmu_hw_config(struct perf_event *ev=
ent)
 		event->hw.flags |=3D PERF_X86_EVENT_PEBS_VIA_PT;
 	}
=20
+	if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
+	    (x86_pmu.intel_cap.pebs_format >=3D 6)) {
+		struct perf_event *leader =3D event->group_leader;
+
+		if (is_slots_event(leader))
+			leader =3D list_next_entry(leader, sibling_list);
+
+		if (leader->attr.precise_ip) {
+			leader->hw.flags |=3D PERF_X86_EVENT_PEBS_CNTR;
+			event->hw.flags |=3D PERF_X86_EVENT_PEBS_CNTR;
+		}
+	}
+
 	if ((event->attr.type =3D=3D PERF_TYPE_HARDWARE) ||
 	    (event->attr.type =3D=3D PERF_TYPE_HW_CACHE))
 		return 0;
@@ -4161,6 +4174,24 @@ static int intel_pmu_hw_config(struct perf_event *ev=
ent)
 	return 0;
 }
=20
+static int intel_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, in=
t *assign)
+{
+	struct perf_event *event;
+	int ret =3D x86_schedule_events(cpuc, n, assign);
+
+	if (ret)
+		return ret;
+
+	if (cpuc->is_fake)
+		return ret;
+
+	event =3D cpuc->event_list[n - 1];
+	if (event && (event->hw.flags & PERF_X86_EVENT_PEBS_CNTR))
+		intel_pmu_pebs_update_cfg(cpuc, n, assign);
+
+	return 0;
+}
+
 /*
  * Currently, the only caller of this function is the atomic_switch_perf_m=
srs().
  * The host perf context helps to prepare the values of the real hardware =
for
@@ -5245,7 +5276,7 @@ static __initconst const struct x86_pmu intel_pmu =3D=
 {
 	.set_period		=3D intel_pmu_set_period,
 	.update			=3D intel_pmu_update,
 	.hw_config		=3D intel_pmu_hw_config,
-	.schedule_events	=3D x86_schedule_events,
+	.schedule_events	=3D intel_pmu_schedule_events,
 	.eventsel		=3D MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		=3D MSR_ARCH_PERFMON_PERFCTR0,
 	.fixedctr		=3D MSR_ARCH_PERFMON_FIXED_CTR0,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index fb04ef307f7b..3cf547590df2 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1281,10 +1281,61 @@ static void adaptive_pebs_record_size_update(void)
 		sz +=3D sizeof(struct pebs_xmm);
 	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
 		sz +=3D x86_pmu.lbr_nr * sizeof(struct lbr_entry);
+	if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) {
+		sz +=3D sizeof(struct pebs_cntr_header);
+
+		/* Metrics base and Metrics Data */
+		if (pebs_data_cfg & PEBS_DATACFG_METRICS)
+			sz +=3D 2 * sizeof(u64);
+
+		if (pebs_data_cfg & PEBS_DATACFG_CNTR) {
+			sz +=3D hweight64((pebs_data_cfg >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DAT=
ACFG_CNTR_MASK)
+			      * sizeof(u64);
+			sz +=3D hweight64((pebs_data_cfg >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATA=
CFG_FIX_MASK)
+			      * sizeof(u64);
+		}
+	}
=20
 	cpuc->pebs_record_size =3D sz;
 }
=20
+static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
+					int idx, u64 *pebs_data_cfg)
+{
+	if (is_metric_event(event)) {
+		*pebs_data_cfg |=3D PEBS_DATACFG_METRICS;
+		return;
+	}
+
+	*pebs_data_cfg |=3D PEBS_DATACFG_CNTR;
+
+	if (idx >=3D INTEL_PMC_IDX_FIXED) {
+		*pebs_data_cfg |=3D ((1ULL << (idx - INTEL_PMC_IDX_FIXED)) & PEBS_DATACF=
G_FIX_MASK)
+				  << PEBS_DATACFG_FIX_SHIFT;
+	} else {
+		*pebs_data_cfg |=3D ((1ULL << idx) & PEBS_DATACFG_CNTR_MASK)
+				  << PEBS_DATACFG_CNTR_SHIFT;
+	}
+}
+
+void intel_pmu_pebs_update_cfg(struct cpu_hw_events *cpuc, int n, int *ass=
ign)
+{
+	struct perf_event *leader, *event;
+	u64 pebs_data_cfg =3D 0;
+	int i =3D n - 1;
+
+	leader =3D cpuc->event_list[i]->group_leader;
+	for (; i >=3D 0; i--) {
+		event =3D cpuc->event_list[i];
+		if (leader !=3D event->group_leader)
+			break;
+		__intel_pmu_pebs_update_cfg(event, assign[i], &pebs_data_cfg);
+	}
+
+	if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
+		cpuc->pebs_data_cfg |=3D pebs_data_cfg | PEBS_UPDATE_DS_SW;
+}
+
 #define PERF_PEBS_MEMINFO_TYPE	(PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | =
  \
 				PERF_SAMPLE_PHYS_ADDR |			     \
 				PERF_SAMPLE_WEIGHT_TYPE |		     \
@@ -2024,6 +2075,40 @@ static void setup_pebs_adaptive_sample_data(struct p=
erf_event *event,
 		}
 	}
=20
+	if (format_size & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) {
+		struct pebs_cntr_header *cntr =3D next_record;
+		int bit;
+
+		next_record +=3D sizeof(struct pebs_cntr_header);
+
+		for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERI=
C) {
+			x86_perf_event_update(cpuc->events[bit], (u64 *)next_record);
+			next_record +=3D sizeof(u64);
+		}
+
+		for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED=
) {
+			/* The slots event will be handled with perf_metric later */
+			if ((cntr->metrics =3D=3D INTEL_CNTR_METRICS) &&
+			    (INTEL_PMC_IDX_FIXED_SLOTS =3D=3D bit + INTEL_PMC_IDX_FIXED)) {
+				next_record +=3D sizeof(u64);
+				continue;
+			}
+			x86_perf_event_update(cpuc->events[bit + INTEL_PMC_IDX_FIXED], (u64 *)n=
ext_record);
+			next_record +=3D sizeof(u64);
+		}
+
+		/* HW will reload the value right after the overflow. */
+		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+			local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period);
+
+		if (cntr->metrics =3D=3D INTEL_CNTR_METRICS) {
+			static_call(intel_pmu_update_topdown_event)
+					(event->group_leader, (u64 *)next_record);
+			next_record +=3D 2 * sizeof(u64);
+		}
+		data->sample_flags |=3D PERF_SAMPLE_READ;
+	}
+
 	WARN_ONCE(next_record !=3D __pebs + (format_size >> 48),
 			"PEBS record size %llu, expected %llu, config %llx\n",
 			format_size >> 48,
@@ -2188,13 +2273,22 @@ __intel_pmu_pebs_event(struct perf_event *event,
 	}
=20
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
-		/*
-		 * Now, auto-reload is only enabled in fixed period mode.
-		 * The reload value is always hwc->sample_period.
-		 * May need to change it, if auto-reload is enabled in
-		 * freq mode later.
-		 */
-		intel_pmu_save_and_restart_reload(event, count);
+		if (event->hw.flags & PERF_X86_EVENT_PEBS_CNTR) {
+			/*
+			 * The value of each sample has been updated when setup
+			 * the corresponding sample data. But there may be a small
+			 * gap between the last overflow and the drain_pebs().
+			 */
+			intel_pmu_save_and_restart_reload(event, 0);
+		} else {
+			/*
+			 * Now, auto-reload is only enabled in fixed period mode.
+			 * The reload value is always hwc->sample_period.
+			 * May need to change it, if auto-reload is enabled in
+			 * freq mode later.
+			 */
+			intel_pmu_save_and_restart_reload(event, count);
+		}
 	} else
 		intel_pmu_save_and_restart(event);
 }
@@ -2486,6 +2580,10 @@ void __init intel_ds_init(void)
 			x86_pmu.large_pebs_flags |=3D PERF_SAMPLE_TIME;
 			break;
=20
+		case 6:
+			if (x86_pmu.intel_cap.pebs_baseline)
+				x86_pmu.large_pebs_flags |=3D PERF_SAMPLE_READ;
+			fallthrough;
 		case 5:
 			x86_pmu.pebs_ept =3D 1;
 			fallthrough;
@@ -2510,7 +2608,7 @@ void __init intel_ds_init(void)
 					  PERF_SAMPLE_REGS_USER |
 					  PERF_SAMPLE_REGS_INTR);
 			}
-			pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+			pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual);
=20
 			if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
 				pr_cont("PEBS-via-PT, ");
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f6b57f0b2787..3d64ed240e91 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1132,6 +1132,7 @@ extern u64 __read_mostly hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
=20
 u64 x86_perf_event_update(struct perf_event *event, u64 *cntr);
+DECLARE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
=20
 static inline unsigned int x86_pmu_config_addr(int index)
 {
@@ -1626,6 +1627,8 @@ void intel_pmu_pebs_disable_all(void);
=20
 void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, boo=
l sched_in);
=20
+void intel_pmu_pebs_update_cfg(struct cpu_hw_events *cpuc, int n, int *ass=
ign);
+
 void intel_pmu_auto_reload_read(struct perf_event *event);
=20
 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_even=
t_flags.h
index 6c977c19f2cd..1d9e385649b5 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -9,7 +9,7 @@ PERF_ARCH(PEBS_LD_HSW,		0x00008) /* haswell style datala, l=
oad */
 PERF_ARCH(PEBS_NA_HSW,		0x00010) /* haswell style datala, unknown */
 PERF_ARCH(EXCL,			0x00020) /* HT exclusivity on counter */
 PERF_ARCH(DYNAMIC,		0x00040) /* dynamic alloc'd constraint */
-			/*	0x00080	*/
+PERF_ARCH(PEBS_CNTR,		0x00080) /* PEBS counters snapshot */
 PERF_ARCH(EXCL_ACCT,		0x00100) /* accounted EXCL event */
 PERF_ARCH(AUTO_RELOAD,		0x00200) /* use PEBS auto-reload */
 PERF_ARCH(LARGE_PEBS,		0x00400) /* use large PEBS */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 91b73571412f..709746cd7c19 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -140,6 +140,12 @@
 #define PEBS_DATACFG_XMMS	BIT_ULL(2)
 #define PEBS_DATACFG_LBRS	BIT_ULL(3)
 #define PEBS_DATACFG_LBR_SHIFT	24
+#define PEBS_DATACFG_CNTR	BIT_ULL(4)
+#define PEBS_DATACFG_CNTR_SHIFT	32
+#define PEBS_DATACFG_CNTR_MASK	GENMASK_ULL(15, 0)
+#define PEBS_DATACFG_FIX_SHIFT	48
+#define PEBS_DATACFG_FIX_MASK	GENMASK_ULL(7, 0)
+#define PEBS_DATACFG_METRICS	BIT_ULL(5)
=20
 /* Steal the highest bit of pebs_data_cfg for SW usage */
 #define PEBS_UPDATE_DS_SW	BIT_ULL(63)
@@ -444,6 +450,15 @@ struct pebs_xmm {
 	u64 xmm[16*2];	/* two entries for each register */
 };
=20
+struct pebs_cntr_header {
+	u32 cntr;
+	u32 fixed;
+	u32 metrics;
+	u32 reserved;
+};
+
+#define INTEL_CNTR_METRICS		0x3
+
 /*
  * AMD Extended Performance Monitoring and Debug cpuid feature detection
  */
--=20
2.35.1
From nobody Sun Feb  8 09:23:53 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.19])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2102E17D379
	for <linux-kernel@vger.kernel.org>; Tue, 25 Jun 2024 18:22:45 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.19
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719339767; cv=none;
 b=YHkPeEulJUNX3uToJPdcurRpqc/34AR9179s5UEOeDf8L5estPJTdUMLfQUiYIZ7PBBipRsPizmZ6cFYBUJNiCtlsfwLSTMxSw41zViDZi8aM20unXZDIKTNAdCDrh82HCph3hIsUrxlTNWRqGbxu2fHlxwzvFX6x2Bicsbyktg=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719339767; c=relaxed/simple;
	bh=WXvQp2+cBf5CqWqIQPFS71Mktms2GeqCF2RD57nwt6I=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=DuIXtM9hhoND1qsJr3o//qZxCYz+iCKUKtU1n8N5BbIpcV0eEtur/xHUdyS4rO4hg59q5Z6d6wUP+326iQR8/+cRxx/EPEAPz/nAo6esWGHAqRu/pCNvg/rCtWOEoL7wAdoyeVEFP9EWragrMPPOkF9SX94E2Oa9E8+/KQzkdCQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=Rpy9PLpE; arc=none smtp.client-ip=198.175.65.19
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="Rpy9PLpE"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719339765; x=1750875765;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=WXvQp2+cBf5CqWqIQPFS71Mktms2GeqCF2RD57nwt6I=;
  b=Rpy9PLpEeRlJLJ2eSj6UNpHQFjDYRMeacPiZLmchlpOKNBia83IzoDIZ
   O3XpaxXn92xXiP7CfQ/+KHHtA1kvTrECQEoDO36w3pRxeFR6xr4aH1JBK
   gBIfTuA1TRTKz9nHLxIsIssp79m+XQlQGCuSpSwWdCHKyM3BHYTwWtRaY
   7nSOF156+IEasQ8tYiy2KbAFOdWZ9sbMxv8XDltHw2Xc1oxSADPs1AIa2
   td0oXSpSBvlJq9VOmb22rIyGO0XoYJ9hrLomTgt3ejCLE1yKhlL8nVtSs
   9vO6BoL5rL6Sg9GmA674AfMnTyZIuqpFUJ4lHk3kE+nHkTRMIYwqJNT9W
   g==;
X-CSE-ConnectionGUID: Tu9V7dmJRI2eTRTU/KyiDw==
X-CSE-MsgGUID: okC9OdMHQG+4Q+Hj6FCRZg==
X-IronPort-AV: E=McAfee;i="6700,10204,11114"; a="16204049"
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="16204049"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by orvoesa111.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 25 Jun 2024 11:22:22 -0700
X-CSE-ConnectionGUID: 9+dqGGxWQcGYA0O9OYuG1g==
X-CSE-MsgGUID: 3mLNBZDaSSa5of+P5ZCedg==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,264,1712646000";
   d="scan'208";a="48913371"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa004.jf.intel.com with ESMTP; 25 Jun 2024 11:22:07 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V2 13/13] perf/x86/intel: Support RDPMC metrics clear mode
Date: Tue, 25 Jun 2024 11:22:56 -0700
Message-Id: <20240625182256.291914-14-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240625182256.291914-1-kan.liang@linux.intel.com>
References: <20240625182256.291914-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The new RDPMC enhancement, metrics clear mode, is to clear the
PERF_METRICS-related resources as well as the fixed-function performance
monitoring counter 3 after the read is performed. It is available for
ring 3. The feature is enumerated by the
IA32_PERF_CAPABILITIES.RDPMC_CLEAR_METRICS[bit 19]. To enable the
feature, the IA32_FIXED_CTR_CTRL.METRICS_CLEAR_EN[bit 14] must be set.

Two ways were considered to enable the feature.
- Expose a knob in the sysfs globally. One user may affect the
  measurement of other users when changing the knob. The solution is
  dropped.
- Introduce a new event format, metrics_clear, for the slots event to
  disable/enable the feature only for the current process. Users can
  utilize the feature as needed.
The latter solution is implemented in the patch.

The current KVM doesn't support the perf metrics yet. For
virtualization, the feature can be enabled later separately.

Update the document of perf metrics.

Suggested-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
---
 arch/x86/events/intel/core.c         | 20 +++++++++++++++++++-
 arch/x86/events/perf_event.h         |  1 +
 arch/x86/include/asm/perf_event.h    |  4 ++++
 tools/perf/Documentation/topdown.txt |  9 +++++++--
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 33e189710073..5868ba8e2167 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2822,6 +2822,9 @@ static void intel_pmu_enable_fixed(struct perf_event =
*event)
 			return;
=20
 		idx =3D INTEL_PMC_IDX_FIXED_SLOTS;
+
+		if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR)
+			bits |=3D INTEL_FIXED_3_METRICS_CLEAR;
 	}
=20
 	intel_set_masks(event, idx);
@@ -4086,7 +4089,12 @@ static int intel_pmu_hw_config(struct perf_event *ev=
ent)
 	 * is used in a metrics group, it too cannot support sampling.
 	 */
 	if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(ev=
ent)) {
-		if (event->attr.config1 || event->attr.config2)
+		/* The metrics_clear can only be set for the slots event */
+		if (event->attr.config1 &&
+		    (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METR=
IC_CLEAR)))
+			return -EINVAL;
+
+		if (event->attr.config2)
 			return -EINVAL;
=20
 		/*
@@ -4673,6 +4681,8 @@ PMU_FORMAT_ATTR(in_tx,  "config:32"	);
 PMU_FORMAT_ATTR(in_tx_cp, "config:33"	);
 PMU_FORMAT_ATTR(eq,	"config:36"	); /* v6 + */
=20
+PMU_FORMAT_ATTR(metrics_clear,	"config1:0"); /* PERF_CAPABILITIES.RDPMC_ME=
TRICS_CLEAR */
+
 static ssize_t umask2_show(struct device *dev,
 			   struct device_attribute *attr,
 			   char *page)
@@ -4692,6 +4702,7 @@ static struct device_attribute format_attr_umask2  =
=3D
 static struct attribute *format_evtsel_ext_attrs[] =3D {
 	&format_attr_umask2.attr,
 	&format_attr_eq.attr,
+	&format_attr_metrics_clear.attr,
 	NULL
 };
=20
@@ -4716,6 +4727,13 @@ evtsel_ext_is_visible(struct kobject *kobj, struct a=
ttribute *attr, int i)
 	if (i =3D=3D 1)
 		return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
=20
+	/* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+	if (i =3D=3D 2) {
+		union perf_capabilities intel_cap =3D hybrid(dev_get_drvdata(dev), intel=
_cap);
+
+		return intel_cap.rdpmc_metrics_clear ? attr->mode : 0;
+	}
+
 	return 0;
 }
=20
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 3d64ed240e91..9d1d5adec0ad 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -624,6 +624,7 @@ union perf_capabilities {
 		u64	pebs_output_pt_available:1;
 		u64	pebs_timing_info:1;
 		u64	anythread_deprecated:1;
+		u64	rdpmc_metrics_clear:1;
 	};
 	u64	capabilities;
 };
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 709746cd7c19..21e1d1fe5972 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -41,6 +41,7 @@
 #define INTEL_FIXED_0_USER				(1ULL << 1)
 #define INTEL_FIXED_0_ANYTHREAD			(1ULL << 2)
 #define INTEL_FIXED_0_ENABLE_PMI			(1ULL << 3)
+#define INTEL_FIXED_3_METRICS_CLEAR			(1ULL << 2)
=20
 #define HSW_IN_TX					(1ULL << 32)
 #define HSW_IN_TX_CHECKPOINTED				(1ULL << 33)
@@ -378,6 +379,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
 #define INTEL_TD_METRIC_MAX			INTEL_TD_METRIC_MEM_BOUND
 #define INTEL_TD_METRIC_NUM			8
=20
+#define INTEL_TD_CFG_METRIC_CLEAR_BIT		0
+#define INTEL_TD_CFG_METRIC_CLEAR		BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
+
 static inline bool is_metric_idx(int idx)
 {
 	return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentatio=
n/topdown.txt
index ae0aee86844f..f36c8ca1dc53 100644
--- a/tools/perf/Documentation/topdown.txt
+++ b/tools/perf/Documentation/topdown.txt
@@ -280,8 +280,13 @@ with no longer interval than a few seconds
=20
 	perf stat -I 1000 --topdown ...
=20
-For user programs using RDPMC directly the counter can
-be reset explicitly using ioctl:
+Starting from the Lunar Lake p-core, a RDPMC metrics clear mode is
+introduced. The metrics and the fixed counter 3 are automatically
+cleared after the read is performed. It is recommended to always enable
+the mode. To enable the mode, the config1 of slots event is set to 1.
+
+On the previous platforms, for user programs using RDPMC directly, the
+counter has to be reset explicitly using ioctl:
=20
 	ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
=20
--=20
2.35.1