From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 99EB01850A9
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:04 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412506; cv=none;
 b=OXiRmmWTB7b1S0iTK4CD3NMmX8ECeLXF7ut6iPQpXET8ua7Av4Di3YNHsiQXFvphf0ABT65t++k3xU3pKA668qGA3jmbZ/cLq3wE0czUN2PDmBT6n+1W5zPAXG75LSpna64Srhy2HeSJZoa98Nc/DdFnQSdFYHKywhoi/k4+Dyg=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412506; c=relaxed/simple;
	bh=pEFVoLXCBUr0+TUG7ZxtIYThmtfKZBkGznApMscpDZs=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=WtQj6LiXMN0LKOTHgqdvOdsO+MwF6DCVpJRCgm7K+1W5xfCUsn4vlH32QaZPG8Vt3ta7T9clCm0+8T38nGe1jEYVWB/qpueYqEoSZXqt2tJRSp15kaVjzBSLY63DNY4C6y8gDHtjcyFwb9oWaTwFP8YZrz5X1GNUJIB7vPwF0TM=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=AUIFB43t; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="AUIFB43t"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412505; x=1750948505;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=pEFVoLXCBUr0+TUG7ZxtIYThmtfKZBkGznApMscpDZs=;
  b=AUIFB43t4QMTP9iIEdWeLFwlo4B48kU5lxN79cwzo1UwE4WwE+e+y4r7
   7jRR0t7yb/Q1TwKdFP5skvnOLzg89cn/T+0BWOCsNkx4Qh3idO+KpA/X9
   iN9g0AxdiQf+g/49kK5+wXeqhVNof9MVT+dYx227Ur3anWy2x5h3UKkdu
   dTONZgExBOParCH1aV2IAwxeCstc8vSVEs49CgFnAp7b0Nr0zA2LjWpiR
   qVGe0L5jfoiRkn1ArLjXslTcYtKc1m/nyzLY9du23XwduZr7ug+MCyAX+
   H0R1cIZOHQgElk7qCn+eLBHcXm1nF3fWRWCg23ybi/8CohcHSYaZw7gfw
   Q==;
X-CSE-ConnectionGUID: W42238L7SFqJ2sR58WIq6A==
X-CSE-MsgGUID: tugvzaxWRgal27fkQP1LdA==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375518"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375518"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:45 -0700
X-CSE-ConnectionGUID: nnahtfMpRNmjB4j2gKXGXA==
X-CSE-MsgGUID: OixOj1BxRS6D+0tbcGBLYw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911950"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:44 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 01/13] perf/x86/intel: Support the PEBS event mask
Date: Wed, 26 Jun 2024 07:35:33 -0700
Message-Id: <20240626143545.480761-2-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The current perf assumes that the counters that support PEBS are
contiguous. But it's not guaranteed with the new leaf 0x23 introduced.
The counters are enumerated with a counter mask. There may be holes in
the counter mask for future platforms or in a virtualization
environment.

Store the PEBS event mask rather than the maximum number of PEBS
counters in the x86 PMU structures.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c    |  8 ++++----
 arch/x86/events/intel/ds.c      | 15 ++++++++-------
 arch/x86/events/perf_event.h    | 14 ++++++++++++--
 arch/x86/include/asm/intel_ds.h |  1 +
 4 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0e835dc134a5..6e2e3638a690 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4728,7 +4728,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hy=
brid_pmu *pmu)
 {
 	intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
 				     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
-	pmu->max_pebs_events =3D min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counte=
rs);
+	pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counte=
rs - 1, 0));
 	pmu->unconstrained =3D (struct event_constraint)
 			     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 						0, pmu->num_counters, 0, 0);
@@ -6070,7 +6070,7 @@ static __always_inline int intel_pmu_init_hybrid(enum=
 hybrid_pmu_type pmus)
=20
 		pmu->num_counters =3D x86_pmu.num_counters;
 		pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
-		pmu->max_pebs_events =3D min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_count=
ers);
+		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_count=
ers - 1, 0));
 		pmu->unconstrained =3D (struct event_constraint)
 				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 							0, pmu->num_counters, 0, 0);
@@ -6193,7 +6193,7 @@ __init int intel_pmu_init(void)
 	x86_pmu.events_maskl		=3D ebx.full;
 	x86_pmu.events_mask_len		=3D eax.split.mask_length;
=20
-	x86_pmu.max_pebs_events		=3D min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num=
_counters);
+	x86_pmu.pebs_events_mask	=3D intel_pmu_pebs_mask(GENMASK_ULL(x86_pmu.num_=
counters - 1, 0));
 	x86_pmu.pebs_capable		=3D PEBS_COUNTER_MASK;
=20
 	/*
@@ -6822,7 +6822,7 @@ __init int intel_pmu_init(void)
 			pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
 		}
=20
-		pmu->max_pebs_events =3D min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_count=
ers);
+		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_count=
ers - 1, 0));
 		pmu->unconstrained =3D (struct event_constraint)
 					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 							   0, pmu->num_counters, 0, 0);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index e010bfed8417..f6105b8dcf87 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1137,7 +1137,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_pmu_=
context *pmu_ctx, bool sche
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds =3D cpuc->ds;
-	int max_pebs_events =3D hybrid(cpuc->pmu, max_pebs_events);
+	int max_pebs_events =3D intel_pmu_max_num_pebs(cpuc->pmu);
 	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	u64 threshold;
 	int reserved;
@@ -2157,6 +2157,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *=
iregs, struct perf_sample_d
 	void *base, *at, *top;
 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] =3D {};
 	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] =3D {};
+	int max_pebs_events =3D intel_pmu_max_num_pebs(NULL);
 	int bit, i, size;
 	u64 mask;
=20
@@ -2168,8 +2169,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *=
iregs, struct perf_sample_d
=20
 	ds->pebs_index =3D ds->pebs_buffer_base;
=20
-	mask =3D (1ULL << x86_pmu.max_pebs_events) - 1;
-	size =3D x86_pmu.max_pebs_events;
+	mask =3D x86_pmu.pebs_events_mask;
+	size =3D max_pebs_events;
 	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
 		mask |=3D ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FI=
XED;
 		size =3D INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
@@ -2208,8 +2209,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *=
iregs, struct perf_sample_d
 			pebs_status =3D p->status =3D cpuc->pebs_enabled;
=20
 		bit =3D find_first_bit((unsigned long *)&pebs_status,
-					x86_pmu.max_pebs_events);
-		if (bit >=3D x86_pmu.max_pebs_events)
+				     max_pebs_events);
+
+		if (!(x86_pmu.pebs_events_mask & (1 << bit)))
 			continue;
=20
 		/*
@@ -2267,7 +2269,6 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *=
iregs, struct perf_sample_d
 {
 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] =3D {};
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
-	int max_pebs_events =3D hybrid(cpuc->pmu, max_pebs_events);
 	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	struct debug_store *ds =3D cpuc->ds;
 	struct perf_event *event;
@@ -2283,7 +2284,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *=
iregs, struct perf_sample_d
=20
 	ds->pebs_index =3D ds->pebs_buffer_base;
=20
-	mask =3D ((1ULL << max_pebs_events) - 1) |
+	mask =3D hybrid(cpuc->pmu, pebs_events_mask) |
 	       (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
 	size =3D INTEL_PMC_IDX_FIXED + num_counters_fixed;
=20
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 72b022a1e16c..0e411539f88a 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -684,7 +684,7 @@ struct x86_hybrid_pmu {
 	cpumask_t			supported_cpus;
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
-	int				max_pebs_events;
+	u64				pebs_events_mask;
 	int				num_counters;
 	int				num_counters_fixed;
 	struct event_constraint		unconstrained;
@@ -852,7 +852,7 @@ struct x86_pmu {
 			pebs_ept		:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
-	int		max_pebs_events;
+	u64		pebs_events_mask;
 	void		(*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
@@ -1661,6 +1661,16 @@ static inline int is_ht_workaround_enabled(void)
 	return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
 }
=20
+static inline u64 intel_pmu_pebs_mask(u64 cntr_mask)
+{
+	return MAX_PEBS_EVENTS_MASK & cntr_mask;
+}
+
+static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
+{
+	return find_last_bit((unsigned long *)&hybrid(pmu, pebs_events_mask), MAX=
_PEBS_EVENTS) + 1;
+}
+
 #else /* CONFIG_CPU_SUP_INTEL */
=20
 static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_d=
s.h
index 2f9eeb5c3069..5dbeac48a5b9 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -9,6 +9,7 @@
 /* The maximal number of PEBS events: */
 #define MAX_PEBS_EVENTS_FMT4	8
 #define MAX_PEBS_EVENTS		32
+#define MAX_PEBS_EVENTS_MASK	GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
 #define MAX_FIXED_PEBS_EVENTS	16
=20
 /*
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 01CDA1862BC
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:05 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412510; cv=none;
 b=aQO0ddaeLbc5wDV/aCbqpTe3Z2MMXJhxJzmjxMLl2kkffs+evtrzeT5/oH0ZsSBiAaic8kVK0x7Iz+zKHcpZzuTrCtxmDl1RpzSg3WV9h2JqPOlnft4HAdmF6QmWff7LhtxVr47wh8RQmHPjV5/C1/x5TK+CiIUcboMVDXNPJ9s=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412510; c=relaxed/simple;
	bh=HNHRf3aT+Z6zU68U5c+B33IpIScTnYuPGfOC0dGYIWs=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=B7RmdlvB1tcOa4sVcPC7fGZeTG23qUKUffXyNrMx0lAXihc74vndGDCZ/R7RnFshiTiBu6hukOdEZWYj3tEkqnyLIXDDORfC4hd63q58xaVClFJ9xWyL/5yDVVz7RsQw2DQvjNqL6kAfAMAhygxTptlHr5xeh/V740gq0kg2k6Q=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=UWaL/ZuO; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="UWaL/ZuO"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412506; x=1750948506;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=HNHRf3aT+Z6zU68U5c+B33IpIScTnYuPGfOC0dGYIWs=;
  b=UWaL/ZuOPvN0C5821xUSkI/LoUU49x+AUzHymuDVaHK62mH9yWxaPJfR
   qEyzQqmQD2NDy5xN+8diuW9/g9UyPJity2xOEzN3Rt9a9hxDwtSMCdRK9
   oBcJizJ/gEDyTLSzt0Pt4OHVgzcGPbTuGda3M5EZkkWuj385y0V5QoIuC
   CeWde0hHuQIR6KdTFwMFQuQ/pCCv4VtSJaqfVP/S2jQAYC1DYjU3Y5k5L
   nYNtEFnXZGmBFQr378tsfGr2OiuZ8m8FFgPM9bHXPIcCa/S3vmoPiThYn
   yH/L3yZmIqi+OrPewm0UcE/32/6mXMxhLtYyE2WKpADumUzAPYH0fcIQp
   A==;
X-CSE-ConnectionGUID: Ffc+xhkTRJSMGtSfv0qQHw==
X-CSE-MsgGUID: vXKhNNEWQNqk4BdEgbKXcQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375521"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375521"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:45 -0700
X-CSE-ConnectionGUID: 6WAtS6sLSWGtHUMDVNrZ/Q==
X-CSE-MsgGUID: 7SgKjjNrREuZWK5uiAIglw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911953"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:45 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>,
	Sandipan Das <sandipan.das@amd.com>,
	Ravi Bangoria <ravi.bangoria@amd.com>,
	silviazhao <silviazhao-oc@zhaoxin.com>
Subject: [PATCH V3 02/13] perf/x86: Support counter mask
Date: Wed, 26 Jun 2024 07:35:34 -0700
Message-Id: <20240626143545.480761-3-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The current perf assumes that both GP and fixed counters are contiguous.
But it's not guaranteed on newer Intel platforms or in a virtualization
environment.

Use the counter mask to replace the number of counters for both GP and
the fixed counters. For the other ARCHs or old platforms which don't
support a counter mask, using GENMASK_ULL(num_counter - 1, 0) to
replace. There is no functional change for them.

The interface to KVM is not changed. The number of counters still be
passed to KVM. It can be updated later separately.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: silviazhao <silviazhao-oc@zhaoxin.com>
---
 arch/x86/events/amd/core.c     |  24 ++---
 arch/x86/events/core.c         |  98 ++++++++++----------
 arch/x86/events/intel/core.c   | 164 ++++++++++++++++-----------------
 arch/x86/events/intel/ds.c     |  19 ++--
 arch/x86/events/intel/knc.c    |   2 +-
 arch/x86/events/intel/p4.c     |  10 +-
 arch/x86/events/intel/p6.c     |   2 +-
 arch/x86/events/perf_event.h   |  47 ++++++++--
 arch/x86/events/zhaoxin/core.c |  12 +--
 9 files changed, 199 insertions(+), 179 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 18bfe3451f3a..920e3a640cad 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -432,7 +432,7 @@ static void __amd_put_nb_event_constraints(struct cpu_h=
w_events *cpuc,
 	 * be removed on one CPU at a time AND PMU is disabled
 	 * when we come here
 	 */
-	for (i =3D 0; i < x86_pmu.num_counters; i++) {
+	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct perf_event *tmp =3D event;
=20
 		if (try_cmpxchg(nb->owners + i, &tmp, NULL))
@@ -501,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cp=
uc, struct perf_event *ev
 	 * because of successive calls to x86_schedule_events() from
 	 * hw_perf_group_sched_in() without hw_perf_enable()
 	 */
-	for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+	for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
 		if (new =3D=3D -1 || hwc->idx =3D=3D idx)
 			/* assign free slot, prefer hwc->idx */
 			old =3D cmpxchg(nb->owners + idx, NULL, event);
@@ -544,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
 	/*
 	 * initialize all possible NB constraints
 	 */
-	for (i =3D 0; i < x86_pmu.num_counters; i++) {
+	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		__set_bit(i, nb->event_constraints[i].idxmsk);
 		nb->event_constraints[i].weight =3D 1;
 	}
@@ -737,7 +737,7 @@ static void amd_pmu_check_overflow(void)
 	 * counters are always enabled when this function is called and
 	 * ARCH_PERFMON_EVENTSEL_INT is always set.
 	 */
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
=20
@@ -757,7 +757,7 @@ static void amd_pmu_enable_all(int added)
=20
 	amd_brs_enable_all();
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		/* only activate events which are marked as active */
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -980,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 	/* Clear any reserved bits set by buggy microcode */
 	status &=3D amd_pmu_global_cntr_mask;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
=20
@@ -1315,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu =3D {
 	.addr_offset            =3D amd_pmu_addr_offset,
 	.event_map		=3D amd_pmu_event_map,
 	.max_events		=3D ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		=3D AMD64_NUM_COUNTERS,
+	.cntr_mask64		=3D GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
 	.add			=3D amd_pmu_add_event,
 	.del			=3D amd_pmu_del_event,
 	.cntval_bits		=3D 48,
@@ -1414,7 +1414,7 @@ static int __init amd_core_pmu_init(void)
 	 */
 	x86_pmu.eventsel	=3D MSR_F15H_PERF_CTL;
 	x86_pmu.perfctr		=3D MSR_F15H_PERF_CTR;
-	x86_pmu.num_counters	=3D AMD64_NUM_COUNTERS_CORE;
+	x86_pmu.cntr_mask64	=3D GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
=20
 	/* Check for Performance Monitoring v2 support */
 	if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1424,9 +1424,9 @@ static int __init amd_core_pmu_init(void)
 		x86_pmu.version =3D 2;
=20
 		/* Find the number of available Core PMCs */
-		x86_pmu.num_counters =3D ebx.split.num_core_pmc;
+		x86_pmu.cntr_mask64 =3D GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
=20
-		amd_pmu_global_cntr_mask =3D (1ULL << x86_pmu.num_counters) - 1;
+		amd_pmu_global_cntr_mask =3D x86_pmu.cntr_mask64;
=20
 		/* Update PMC handling functions */
 		x86_pmu.enable_all =3D amd_pmu_v2_enable_all;
@@ -1454,12 +1454,12 @@ static int __init amd_core_pmu_init(void)
 		 * even numbered counter that has a consecutive adjacent odd
 		 * numbered counter following it.
 		 */
-		for (i =3D 0; i < x86_pmu.num_counters - 1; i +=3D 2)
+		for (i =3D 0; i < x86_pmu_max_num_counters(NULL) - 1; i +=3D 2)
 			even_ctr_mask |=3D BIT_ULL(i);
=20
 		pair_constraint =3D (struct event_constraint)
 				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
-				    x86_pmu.num_counters / 2, 0,
+				    x86_pmu_max_num_counters(NULL) / 2, 0,
 				    PERF_X86_EVENT_PAIR);
=20
 		x86_pmu.get_event_constraints =3D amd_get_event_constraints_f17h;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index acd367c45334..0c51cfdf7609 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -189,29 +189,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
=20
 #ifdef CONFIG_X86_LOCAL_APIC
=20
-static inline int get_possible_num_counters(void)
+static inline u64 get_possible_counter_mask(void)
 {
-	int i, num_counters =3D x86_pmu.num_counters;
+	u64 cntr_mask =3D x86_pmu.cntr_mask64;
+	int i;
=20
 	if (!is_hybrid())
-		return num_counters;
+		return cntr_mask;
=20
 	for (i =3D 0; i < x86_pmu.num_hybrid_pmus; i++)
-		num_counters =3D max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_coun=
ters);
+		cntr_mask |=3D x86_pmu.hybrid_pmu[i].cntr_mask64;
=20
-	return num_counters;
+	return cntr_mask;
 }
=20
 static bool reserve_pmc_hardware(void)
 {
-	int i, num_counters =3D get_possible_num_counters();
+	u64 cntr_mask =3D get_possible_counter_mask();
+	int i, end;
=20
-	for (i =3D 0; i < num_counters; i++) {
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
 		if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
 			goto perfctr_fail;
 	}
=20
-	for (i =3D 0; i < num_counters; i++) {
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
 		if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
 			goto eventsel_fail;
 	}
@@ -219,13 +221,14 @@ static bool reserve_pmc_hardware(void)
 	return true;
=20
 eventsel_fail:
-	for (i--; i >=3D 0; i--)
+	end =3D i;
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
 		release_evntsel_nmi(x86_pmu_config_addr(i));
-
-	i =3D num_counters;
+	i =3D X86_PMC_IDX_MAX;
=20
 perfctr_fail:
-	for (i--; i >=3D 0; i--)
+	end =3D i;
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
 		release_perfctr_nmi(x86_pmu_event_addr(i));
=20
 	return false;
@@ -233,9 +236,10 @@ static bool reserve_pmc_hardware(void)
=20
 static void release_pmc_hardware(void)
 {
-	int i, num_counters =3D get_possible_num_counters();
+	u64 cntr_mask =3D get_possible_counter_mask();
+	int i;
=20
-	for (i =3D 0; i < num_counters; i++) {
+	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
 		release_perfctr_nmi(x86_pmu_event_addr(i));
 		release_evntsel_nmi(x86_pmu_config_addr(i));
 	}
@@ -248,7 +252,8 @@ static void release_pmc_hardware(void) {}
=20
 #endif
=20
-bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_f=
ixed)
+bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
+		     unsigned long *fixed_cntr_mask)
 {
 	u64 val, val_fail =3D -1, val_new=3D ~0;
 	int i, reg, reg_fail =3D -1, ret =3D 0;
@@ -259,7 +264,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters,=
 int num_counters_fixed)
 	 * Check to see if the BIOS enabled any of the counters, if so
 	 * complain and bail.
 	 */
-	for (i =3D 0; i < num_counters; i++) {
+	for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
 		reg =3D x86_pmu_config_addr(i);
 		ret =3D rdmsrl_safe(reg, &val);
 		if (ret)
@@ -273,12 +278,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counter=
s, int num_counters_fixed)
 		}
 	}
=20
-	if (num_counters_fixed) {
+	if (*(u64 *)fixed_cntr_mask) {
 		reg =3D MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 		ret =3D rdmsrl_safe(reg, &val);
 		if (ret)
 			goto msr_fail;
-		for (i =3D 0; i < num_counters_fixed; i++) {
+		for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
 			if (fixed_counter_disabled(i, pmu))
 				continue;
 			if (val & (0x03ULL << i*4)) {
@@ -679,7 +684,7 @@ void x86_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct hw_perf_event *hwc =3D &cpuc->events[idx]->hw;
 		u64 val;
=20
@@ -736,7 +741,7 @@ void x86_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct hw_perf_event *hwc =3D &cpuc->events[idx]->hw;
=20
 		if (!test_bit(idx, cpuc->active_mask))
@@ -975,7 +980,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
=20
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
-	int num_counters =3D hybrid(cpuc->pmu, num_counters);
 	struct event_constraint *c;
 	struct perf_event *e;
 	int n0, i, wmin, wmax, unsched =3D 0;
@@ -1051,7 +1055,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, i=
nt n, int *assign)
=20
 	/* slow path */
 	if (i !=3D n) {
-		int gpmax =3D num_counters;
+		int gpmax =3D x86_pmu_max_num_counters(cpuc->pmu);
=20
 		/*
 		 * Do not allow scheduling of more than half the available
@@ -1072,7 +1076,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, i=
nt n, int *assign)
 		 * the extra Merge events needed by large increment events.
 		 */
 		if (x86_pmu.flags & PMU_FL_PAIR) {
-			gpmax =3D num_counters - cpuc->n_pair;
+			gpmax -=3D cpuc->n_pair;
 			WARN_ON(gpmax <=3D 0);
 		}
=20
@@ -1157,12 +1161,10 @@ static int collect_event(struct cpu_hw_events *cpuc=
, struct perf_event *event,
  */
 static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *l=
eader, bool dogrp)
 {
-	int num_counters =3D hybrid(cpuc->pmu, num_counters);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	struct perf_event *event;
 	int n, max_count;
=20
-	max_count =3D num_counters + num_counters_fixed;
+	max_count =3D x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixe=
d(cpuc->pmu);
=20
 	/* current number of events already accepted */
 	n =3D cpuc->n_events;
@@ -1522,13 +1524,13 @@ void perf_event_print_debug(void)
 	u64 pebs, debugctl;
 	int cpu =3D smp_processor_id();
 	struct cpu_hw_events *cpuc =3D &per_cpu(cpu_hw_events, cpu);
-	int num_counters =3D hybrid(cpuc->pmu, num_counters);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
+	unsigned long *cntr_mask =3D hybrid(cpuc->pmu, cntr_mask);
+	unsigned long *fixed_cntr_mask =3D hybrid(cpuc->pmu, fixed_cntr_mask);
 	struct event_constraint *pebs_constraints =3D hybrid(cpuc->pmu, pebs_cons=
traints);
 	unsigned long flags;
 	int idx;
=20
-	if (!num_counters)
+	if (!*(u64 *)cntr_mask)
 		return;
=20
 	local_irq_save(flags);
@@ -1555,7 +1557,7 @@ void perf_event_print_debug(void)
 	}
 	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
=20
-	for (idx =3D 0; idx < num_counters; idx++) {
+	for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
 		rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
 		rdmsrl(x86_pmu_event_addr(idx), pmc_count);
=20
@@ -1568,7 +1570,7 @@ void perf_event_print_debug(void)
 		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 			cpu, idx, prev_left);
 	}
-	for (idx =3D 0; idx < num_counters_fixed; idx++) {
+	for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
 		if (fixed_counter_disabled(idx, cpuc->pmu))
 			continue;
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
@@ -1682,7 +1684,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
 	 */
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
=20
@@ -2038,18 +2040,15 @@ static void _x86_pmu_read(struct perf_event *event)
 	static_call(x86_pmu_update)(event);
 }
=20
-void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
-			  u64 intel_ctrl)
+void x86_pmu_show_pmu_cap(struct pmu *pmu)
 {
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
-	pr_info("... generic registers:      %d\n",     num_counters);
+	pr_info("... generic registers:      %d\n",     x86_pmu_num_counters(pmu)=
);
 	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
 	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose events:   %lu\n",
-			hweight64((((1ULL << num_counters_fixed) - 1)
-					<< INTEL_PMC_IDX_FIXED) & intel_ctrl));
-	pr_info("... event mask:             %016Lx\n", intel_ctrl);
+	pr_info("... fixed-purpose events:   %d\n",     x86_pmu_num_counters_fixe=
d(pmu));
+	pr_info("... event mask:             %016Lx\n", hybrid(pmu, intel_ctrl));
 }
=20
 static int __init init_hw_perf_events(void)
@@ -2086,7 +2085,7 @@ static int __init init_hw_perf_events(void)
 	pmu_check_apic();
=20
 	/* sanity check that the hardware exists or is emulated */
-	if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fix=
ed))
+	if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
 		goto out_bad_pmu;
=20
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -2097,14 +2096,14 @@ static int __init init_hw_perf_events(void)
 		quirk->func();
=20
 	if (!x86_pmu.intel_ctrl)
-		x86_pmu.intel_ctrl =3D (1 << x86_pmu.num_counters) - 1;
+		x86_pmu.intel_ctrl =3D x86_pmu.cntr_mask64;
=20
 	perf_events_lapic_init();
 	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
=20
 	unconstrained =3D (struct event_constraint)
-		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
-				   0, x86_pmu.num_counters, 0, 0);
+		__EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
+				   0, x86_pmu_num_counters(NULL), 0, 0);
=20
 	x86_pmu_format_group.attrs =3D x86_pmu.format_attrs;
=20
@@ -2113,11 +2112,8 @@ static int __init init_hw_perf_events(void)
=20
 	pmu.attr_update =3D x86_pmu.attr_update;
=20
-	if (!is_hybrid()) {
-		x86_pmu_show_pmu_cap(x86_pmu.num_counters,
-				     x86_pmu.num_counters_fixed,
-				     x86_pmu.intel_ctrl);
-	}
+	if (!is_hybrid())
+		x86_pmu_show_pmu_cap(NULL);
=20
 	if (!x86_pmu.read)
 		x86_pmu.read =3D _x86_pmu_read;
@@ -2481,7 +2477,7 @@ void perf_clear_dirty_counters(void)
 	for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
 		if (i >=3D INTEL_PMC_IDX_FIXED) {
 			/* Metrics and fake events don't have corresponding HW counters. */
-			if ((i - INTEL_PMC_IDX_FIXED) >=3D hybrid(cpuc->pmu, num_counters_fixed=
))
+			if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mas=
k)))
 				continue;
=20
 			wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
@@ -2986,8 +2982,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capab=
ility *cap)
 	 * base PMU holds the correct number of counters for P-cores.
 	 */
 	cap->version		=3D x86_pmu.version;
-	cap->num_counters_gp	=3D x86_pmu.num_counters;
-	cap->num_counters_fixed	=3D x86_pmu.num_counters_fixed;
+	cap->num_counters_gp	=3D x86_pmu_num_counters(NULL);
+	cap->num_counters_fixed	=3D x86_pmu_num_counters_fixed(NULL);
 	cap->bit_width_gp	=3D x86_pmu.cntval_bits;
 	cap->bit_width_fixed	=3D x86_pmu.cntval_bits;
 	cap->events_mask	=3D (unsigned int)x86_pmu.events_maskl;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 6e2e3638a690..aaf3884073ba 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2874,23 +2874,23 @@ static void intel_pmu_reset(void)
 {
 	struct debug_store *ds =3D __this_cpu_read(cpu_hw_events.ds);
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
-	int num_counters =3D hybrid(cpuc->pmu, num_counters);
+	unsigned long *cntr_mask =3D hybrid(cpuc->pmu, cntr_mask);
+	unsigned long *fixed_cntr_mask =3D hybrid(cpuc->pmu, fixed_cntr_mask);
 	unsigned long flags;
 	int idx;
=20
-	if (!num_counters)
+	if (!*(u64 *)cntr_mask)
 		return;
=20
 	local_irq_save(flags);
=20
 	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
=20
-	for (idx =3D 0; idx < num_counters; idx++) {
+	for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
 		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
 		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
 	}
-	for (idx =3D 0; idx < num_counters_fixed; idx++) {
+	for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
 		if (fixed_counter_disabled(idx, cpuc->pmu))
 			continue;
 		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
@@ -2940,8 +2940,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs =
*regs,
 	    !guest_pebs_idxs)
 		return;
=20
-	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
-			 INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
+	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX)=
 {
 		event =3D cpuc->events[bit];
 		if (!event->attr.precise_ip)
 			continue;
@@ -4199,7 +4198,7 @@ static struct perf_guest_switch_msr *core_guest_get_m=
srs(int *nr, void *data)
 	struct perf_guest_switch_msr *arr =3D cpuc->guest_switch_msrs;
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++)  {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct perf_event *event =3D cpuc->events[idx];
=20
 		arr[idx].msr =3D x86_pmu_config_addr(idx);
@@ -4217,7 +4216,7 @@ static struct perf_guest_switch_msr *core_guest_get_m=
srs(int *nr, void *data)
 			arr[idx].guest &=3D ~ARCH_PERFMON_EVENTSEL_ENABLE;
 	}
=20
-	*nr =3D x86_pmu.num_counters;
+	*nr =3D x86_pmu_max_num_counters(cpuc->pmu);
 	return arr;
 }
=20
@@ -4232,7 +4231,7 @@ static void core_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct hw_perf_event *hwc =3D &cpuc->events[idx]->hw;
=20
 		if (!test_bit(idx, cpuc->active_mask) ||
@@ -4684,13 +4683,33 @@ static void flip_smm_bit(void *data)
 	}
 }
=20
-static void intel_pmu_check_num_counters(int *num_counters,
-					 int *num_counters_fixed,
-					 u64 *intel_ctrl, u64 fixed_mask);
+static void intel_pmu_check_counters_mask(unsigned long *cntr_mask,
+					  unsigned long *fixed_cntr_mask,
+					  u64 *intel_ctrl)
+{
+	unsigned int bit;
+
+	bit =3D find_last_bit(cntr_mask, X86_PMC_IDX_MAX) + 1;
+	if (bit > INTEL_PMC_MAX_GENERIC) {
+		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+		     bit, INTEL_PMC_MAX_GENERIC);
+		*cntr_mask &=3D GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
+	}
+	*intel_ctrl =3D *cntr_mask;
+
+	bit =3D find_last_bit(fixed_cntr_mask, X86_PMC_IDX_MAX) + 1;
+	if (bit > INTEL_PMC_MAX_FIXED) {
+		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+		     bit, INTEL_PMC_MAX_FIXED);
+		*fixed_cntr_mask &=3D GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
+	}
+
+	*intel_ctrl |=3D (u64)*fixed_cntr_mask << INTEL_PMC_IDX_FIXED;
+}
=20
 static void intel_pmu_check_event_constraints(struct event_constraint *eve=
nt_constraints,
-					      int num_counters,
-					      int num_counters_fixed,
+					      u64 cntr_mask,
+					      u64 fixed_cntr_mask,
 					      u64 intel_ctrl);
=20
 static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
@@ -4713,11 +4732,10 @@ static void update_pmu_cap(struct x86_hybrid_pmu *p=
mu)
 	if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
 		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
 			    &eax, &ebx, &ecx, &edx);
-		pmu->num_counters =3D fls(eax);
-		pmu->num_counters_fixed =3D fls(ebx);
+		pmu->cntr_mask64 =3D eax;
+		pmu->fixed_cntr_mask64 =3D ebx;
 	}
=20
-
 	if (!intel_pmu_broken_perf_cap()) {
 		/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration =
*/
 		rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
@@ -4726,12 +4744,12 @@ static void update_pmu_cap(struct x86_hybrid_pmu *p=
mu)
=20
 static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
 {
-	intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
-				     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
-	pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counte=
rs - 1, 0));
+	intel_pmu_check_counters_mask(pmu->cntr_mask, pmu->fixed_cntr_mask,
+				      &pmu->intel_ctrl);
+	pmu->pebs_events_mask =3D intel_pmu_pebs_mask(pmu->cntr_mask64);
 	pmu->unconstrained =3D (struct event_constraint)
-			     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-						0, pmu->num_counters, 0, 0);
+			     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+						0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
=20
 	if (pmu->intel_cap.perf_metrics)
 		pmu->intel_ctrl |=3D 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
@@ -4744,8 +4762,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hy=
brid_pmu *pmu)
 		pmu->pmu.capabilities &=3D ~PERF_PMU_CAP_AUX_OUTPUT;
=20
 	intel_pmu_check_event_constraints(pmu->event_constraints,
-					  pmu->num_counters,
-					  pmu->num_counters_fixed,
+					  pmu->cntr_mask64,
+					  pmu->fixed_cntr_mask64,
 					  pmu->intel_ctrl);
=20
 	intel_pmu_check_extra_regs(pmu->extra_regs);
@@ -4806,7 +4824,7 @@ static bool init_hybrid_pmu(int cpu)
=20
 	intel_pmu_check_hybrid_pmus(pmu);
=20
-	if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixe=
d))
+	if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask))
 		return false;
=20
 	pr_info("%s PMU driver: ", pmu->name);
@@ -4816,8 +4834,7 @@ static bool init_hybrid_pmu(int cpu)
=20
 	pr_cont("\n");
=20
-	x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed,
-			     pmu->intel_ctrl);
+	x86_pmu_show_pmu_cap(&pmu->pmu);
=20
 end:
 	cpumask_set_cpu(cpu, &pmu->supported_cpus);
@@ -5955,29 +5972,9 @@ static const struct attribute_group *hybrid_attr_upd=
ate[] =3D {
=20
 static struct attribute *empty_attrs;
=20
-static void intel_pmu_check_num_counters(int *num_counters,
-					 int *num_counters_fixed,
-					 u64 *intel_ctrl, u64 fixed_mask)
-{
-	if (*num_counters > INTEL_PMC_MAX_GENERIC) {
-		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
-		     *num_counters, INTEL_PMC_MAX_GENERIC);
-		*num_counters =3D INTEL_PMC_MAX_GENERIC;
-	}
-	*intel_ctrl =3D (1ULL << *num_counters) - 1;
-
-	if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) {
-		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
-		     *num_counters_fixed, INTEL_PMC_MAX_FIXED);
-		*num_counters_fixed =3D INTEL_PMC_MAX_FIXED;
-	}
-
-	*intel_ctrl |=3D fixed_mask << INTEL_PMC_IDX_FIXED;
-}
-
 static void intel_pmu_check_event_constraints(struct event_constraint *eve=
nt_constraints,
-					      int num_counters,
-					      int num_counters_fixed,
+					      u64 cntr_mask,
+					      u64 fixed_cntr_mask,
 					      u64 intel_ctrl)
 {
 	struct event_constraint *c;
@@ -6014,10 +6011,9 @@ static void intel_pmu_check_event_constraints(struct=
 event_constraint *event_con
 			 * generic counters
 			 */
 			if (!use_fixed_pseudo_encoding(c->code))
-				c->idxmsk64 |=3D (1ULL << num_counters) - 1;
+				c->idxmsk64 |=3D cntr_mask;
 		}
-		c->idxmsk64 &=3D
-			~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed));
+		c->idxmsk64 &=3D cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED);
 		c->weight =3D hweight64(c->idxmsk64);
 	}
 }
@@ -6068,12 +6064,12 @@ static __always_inline int intel_pmu_init_hybrid(en=
um hybrid_pmu_type pmus)
 		pmu->pmu_type =3D intel_hybrid_pmu_type_map[bit].id;
 		pmu->name =3D intel_hybrid_pmu_type_map[bit].name;
=20
-		pmu->num_counters =3D x86_pmu.num_counters;
-		pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
-		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_count=
ers - 1, 0));
+		pmu->cntr_mask64 =3D x86_pmu.cntr_mask64;
+		pmu->fixed_cntr_mask64 =3D x86_pmu.fixed_cntr_mask64;
+		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(pmu->cntr_mask64);
 		pmu->unconstrained =3D (struct event_constraint)
-				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-							0, pmu->num_counters, 0, 0);
+				     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+							0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
=20
 		pmu->intel_cap.capabilities =3D x86_pmu.intel_cap.capabilities;
 		if (pmu->pmu_type & hybrid_small) {
@@ -6186,14 +6182,14 @@ __init int intel_pmu_init(void)
 		x86_pmu =3D intel_pmu;
=20
 	x86_pmu.version			=3D version;
-	x86_pmu.num_counters		=3D eax.split.num_counters;
+	x86_pmu.cntr_mask64		=3D GENMASK_ULL(eax.split.num_counters - 1, 0);
 	x86_pmu.cntval_bits		=3D eax.split.bit_width;
 	x86_pmu.cntval_mask		=3D (1ULL << eax.split.bit_width) - 1;
=20
 	x86_pmu.events_maskl		=3D ebx.full;
 	x86_pmu.events_mask_len		=3D eax.split.mask_length;
=20
-	x86_pmu.pebs_events_mask	=3D intel_pmu_pebs_mask(GENMASK_ULL(x86_pmu.num_=
counters - 1, 0));
+	x86_pmu.pebs_events_mask	=3D intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
 	x86_pmu.pebs_capable		=3D PEBS_COUNTER_MASK;
=20
 	/*
@@ -6203,12 +6199,10 @@ __init int intel_pmu_init(void)
 	if (version > 1 && version < 5) {
 		int assume =3D 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
=20
-		x86_pmu.num_counters_fixed =3D
-			max((int)edx.split.num_counters_fixed, assume);
-
-		fixed_mask =3D (1L << x86_pmu.num_counters_fixed) - 1;
+		x86_pmu.fixed_cntr_mask64 =3D
+			GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0);
 	} else if (version >=3D 5)
-		x86_pmu.num_counters_fixed =3D fls(fixed_mask);
+		x86_pmu.fixed_cntr_mask64 =3D fixed_mask;
=20
 	if (boot_cpu_has(X86_FEATURE_PDCM)) {
 		u64 capabilities;
@@ -6803,11 +6797,13 @@ __init int intel_pmu_init(void)
 		pmu =3D &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
 		intel_pmu_init_glc(&pmu->pmu);
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
-			pmu->num_counters =3D x86_pmu.num_counters + 2;
-			pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed + 1;
+			pmu->cntr_mask64 <<=3D 2;
+			pmu->cntr_mask64 |=3D 0x3;
+			pmu->fixed_cntr_mask64 <<=3D 1;
+			pmu->fixed_cntr_mask64 |=3D 0x1;
 		} else {
-			pmu->num_counters =3D x86_pmu.num_counters;
-			pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
+			pmu->cntr_mask64 =3D x86_pmu.cntr_mask64;
+			pmu->fixed_cntr_mask64 =3D x86_pmu.fixed_cntr_mask64;
 		}
=20
 		/*
@@ -6817,15 +6813,16 @@ __init int intel_pmu_init(void)
 		 * mistakenly add extra counters for P-cores. Correct the number of
 		 * counters here.
 		 */
-		if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
-			pmu->num_counters =3D x86_pmu.num_counters;
-			pmu->num_counters_fixed =3D x86_pmu.num_counters_fixed;
+		if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed=
(&pmu->pmu) > 4)) {
+			pmu->cntr_mask64 =3D x86_pmu.cntr_mask64;
+			pmu->fixed_cntr_mask64 =3D x86_pmu.fixed_cntr_mask64;
 		}
=20
-		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_count=
ers - 1, 0));
+		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(pmu->cntr_mask64);
 		pmu->unconstrained =3D (struct event_constraint)
-					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-							   0, pmu->num_counters, 0, 0);
+				     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+				     0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
+
 		pmu->extra_regs =3D intel_glc_extra_regs;
=20
 		/* Initialize Atom core specific PerfMon capabilities.*/
@@ -6892,9 +6889,9 @@ __init int intel_pmu_init(void)
 			 * The constraints may be cut according to the CPUID enumeration
 			 * by inserting the EVENT_CONSTRAINT_END.
 			 */
-			if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
-				x86_pmu.num_counters_fixed =3D INTEL_PMC_MAX_FIXED;
-			intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight =3D -=
1;
+			if (find_last_bit(x86_pmu.fixed_cntr_mask, X86_PMC_IDX_MAX) > INTEL_PMC=
_MAX_FIXED)
+				x86_pmu.fixed_cntr_mask64 &=3D GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
+			intel_v5_gen_event_constraints[find_last_bit(x86_pmu.fixed_cntr_mask, I=
NTEL_PMC_MAX_FIXED) + 1].weight =3D -1;
 			x86_pmu.event_constraints =3D intel_v5_gen_event_constraints;
 			pr_cont("generic architected perfmon, ");
 			name =3D "generic_arch_v5+";
@@ -6921,18 +6918,17 @@ __init int intel_pmu_init(void)
 		x86_pmu.attr_update =3D hybrid_attr_update;
 	}
=20
-	intel_pmu_check_num_counters(&x86_pmu.num_counters,
-				     &x86_pmu.num_counters_fixed,
-				     &x86_pmu.intel_ctrl,
-				     (u64)fixed_mask);
+	intel_pmu_check_counters_mask(x86_pmu.cntr_mask,
+				      x86_pmu.fixed_cntr_mask,
+				      &x86_pmu.intel_ctrl);
=20
 	/* AnyThread may be deprecated on arch perfmon v5 or later */
 	if (x86_pmu.intel_cap.anythread_deprecated)
 		x86_pmu.format_attrs =3D intel_arch_formats_attr;
=20
 	intel_pmu_check_event_constraints(x86_pmu.event_constraints,
-					  x86_pmu.num_counters,
-					  x86_pmu.num_counters_fixed,
+					  x86_pmu.cntr_mask64,
+					  x86_pmu.fixed_cntr_mask64,
 					  x86_pmu.intel_ctrl);
 	/*
 	 * Access LBR MSR may cause #GP under certain circumstances.
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index f6105b8dcf87..6f834a7d852a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1138,7 +1138,6 @@ static inline void pebs_update_threshold(struct cpu_h=
w_events *cpuc)
 {
 	struct debug_store *ds =3D cpuc->ds;
 	int max_pebs_events =3D intel_pmu_max_num_pebs(cpuc->pmu);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	u64 threshold;
 	int reserved;
=20
@@ -1146,7 +1145,7 @@ static inline void pebs_update_threshold(struct cpu_h=
w_events *cpuc)
 		return;
=20
 	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
-		reserved =3D max_pebs_events + num_counters_fixed;
+		reserved =3D max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
 	else
 		reserved =3D max_pebs_events;
=20
@@ -2172,8 +2171,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *=
iregs, struct perf_sample_d
 	mask =3D x86_pmu.pebs_events_mask;
 	size =3D max_pebs_events;
 	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
-		mask |=3D ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FI=
XED;
-		size =3D INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
+		mask |=3D x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
+		size =3D INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
 	}
=20
 	if (unlikely(base >=3D top)) {
@@ -2269,11 +2268,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs=
 *iregs, struct perf_sample_d
 {
 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] =3D {};
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
-	int num_counters_fixed =3D hybrid(cpuc->pmu, num_counters_fixed);
 	struct debug_store *ds =3D cpuc->ds;
 	struct perf_event *event;
 	void *base, *at, *top;
-	int bit, size;
+	int bit;
 	u64 mask;
=20
 	if (!x86_pmu.pebs_active)
@@ -2285,11 +2283,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs=
 *iregs, struct perf_sample_d
 	ds->pebs_index =3D ds->pebs_buffer_base;
=20
 	mask =3D hybrid(cpuc->pmu, pebs_events_mask) |
-	       (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
-	size =3D INTEL_PMC_IDX_FIXED + num_counters_fixed;
+	       (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
=20
 	if (unlikely(base >=3D top)) {
-		intel_pmu_pebs_event_update_no_drain(cpuc, size);
+		intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
 		return;
 	}
=20
@@ -2299,11 +2296,11 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs=
 *iregs, struct perf_sample_d
 		pebs_status =3D get_pebs_status(at) & cpuc->pebs_enabled;
 		pebs_status &=3D mask;
=20
-		for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
+		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX)
 			counts[bit]++;
 	}
=20
-	for_each_set_bit(bit, (unsigned long *)&mask, size) {
+	for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
 		if (counts[bit] =3D=3D 0)
 			continue;
=20
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
index 618001c208e8..034a1f6a457c 100644
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst =3D {
 	.apic			=3D 1,
 	.max_period		=3D (1ULL << 39) - 1,
 	.version		=3D 0,
-	.num_counters		=3D 2,
+	.cntr_mask64		=3D 0x3,
 	.cntval_bits		=3D 40,
 	.cntval_mask		=3D (1ULL << 40) - 1,
 	.get_event_constraints	=3D x86_get_event_constraints,
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 35936188db01..844bc4fc4724 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -919,7 +919,7 @@ static void p4_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct perf_event *event =3D cpuc->events[idx];
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -998,7 +998,7 @@ static void p4_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	int idx;
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		struct perf_event *event =3D cpuc->events[idx];
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -1040,7 +1040,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
=20
 	cpuc =3D this_cpu_ptr(&cpu_hw_events);
=20
-	for (idx =3D 0; idx < x86_pmu.num_counters; idx++) {
+	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		int overflow;
=20
 		if (!test_bit(idx, cpuc->active_mask)) {
@@ -1353,7 +1353,7 @@ static __initconst const struct x86_pmu p4_pmu =3D {
 	 * though leave it restricted at moment assuming
 	 * HT is on
 	 */
-	.num_counters		=3D ARCH_P4_MAX_CCCR,
+	.cntr_mask64		=3D GENMASK_ULL(ARCH_P4_MAX_CCCR - 1, 0),
 	.apic			=3D 1,
 	.cntval_bits		=3D ARCH_P4_CNTRVAL_BITS,
 	.cntval_mask		=3D ARCH_P4_CNTRVAL_MASK,
@@ -1395,7 +1395,7 @@ __init int p4_pmu_init(void)
 	 *
 	 * Solve this by zero'ing out the registers to mimic a reset.
 	 */
-	for (i =3D 0; i < x86_pmu.num_counters; i++) {
+	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 		reg =3D x86_pmu_config_addr(i);
 		wrmsrl_safe(reg, 0ULL);
 	}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
index 408879b0c0d4..a6cffb4f4ef5 100644
--- a/arch/x86/events/intel/p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -214,7 +214,7 @@ static __initconst const struct x86_pmu p6_pmu =3D {
 	.apic			=3D 1,
 	.max_period		=3D (1ULL << 31) - 1,
 	.version		=3D 0,
-	.num_counters		=3D 2,
+	.cntr_mask64		=3D 0x3,
 	/*
 	 * Events have 40 bits implemented. However they are designed such
 	 * that bits [32-39] are sign extensions of bit 31. As such the
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 0e411539f88a..b3214d6e8f4c 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -685,8 +685,14 @@ struct x86_hybrid_pmu {
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
 	u64				pebs_events_mask;
-	int				num_counters;
-	int				num_counters_fixed;
+	union {
+			u64		cntr_mask64;
+			unsigned long	cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
+	union {
+			u64		fixed_cntr_mask64;
+			unsigned long	fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
 	struct event_constraint		unconstrained;
=20
 	u64				hw_cache_event_ids
@@ -774,8 +780,14 @@ struct x86_pmu {
 	int		(*rdpmc_index)(int index);
 	u64		(*event_map)(int);
 	int		max_events;
-	int		num_counters;
-	int		num_counters_fixed;
+	union {
+			u64		cntr_mask64;
+			unsigned long	cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
+	union {
+			u64		fixed_cntr_mask64;
+			unsigned long	fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
 	int		cntval_bits;
 	u64		cntval_mask;
 	union {
@@ -1125,8 +1137,8 @@ static inline int x86_pmu_rdpmc_index(int index)
 	return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
 }
=20
-bool check_hw_exists(struct pmu *pmu, int num_counters,
-		     int num_counters_fixed);
+bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
+		     unsigned long *fixed_cntr_mask);
=20
 int x86_add_exclusive(unsigned int what);
=20
@@ -1197,8 +1209,27 @@ void x86_pmu_enable_event(struct perf_event *event);
=20
 int x86_pmu_handle_irq(struct pt_regs *regs);
=20
-void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
-			  u64 intel_ctrl);
+void x86_pmu_show_pmu_cap(struct pmu *pmu);
+
+static inline int x86_pmu_num_counters(struct pmu *pmu)
+{
+	return hweight64(hybrid(pmu, cntr_mask64));
+}
+
+static inline int x86_pmu_max_num_counters(struct pmu *pmu)
+{
+	return find_last_bit(hybrid(pmu, cntr_mask), X86_PMC_IDX_MAX) + 1;
+}
+
+static inline int x86_pmu_num_counters_fixed(struct pmu *pmu)
+{
+	return hweight64(hybrid(pmu, fixed_cntr_mask64));
+}
+
+static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu)
+{
+	return find_last_bit(hybrid(pmu, fixed_cntr_mask), INTEL_PMC_MAX_FIXED) +=
 1;
+}
=20
 extern struct event_constraint emptyconstraint;
=20
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 3e9acdaeed1e..2fd9b0cf9a5e 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void)
 	pr_info("Version check pass!\n");
=20
 	x86_pmu.version			=3D version;
-	x86_pmu.num_counters		=3D eax.split.num_counters;
+	x86_pmu.cntr_mask64		=3D GENMASK_ULL(eax.split.num_counters - 1, 0);
 	x86_pmu.cntval_bits		=3D eax.split.bit_width;
 	x86_pmu.cntval_mask		=3D (1ULL << eax.split.bit_width) - 1;
 	x86_pmu.events_maskl		=3D ebx.full;
 	x86_pmu.events_mask_len		=3D eax.split.mask_length;
=20
-	x86_pmu.num_counters_fixed =3D edx.split.num_counters_fixed;
+	x86_pmu.fixed_cntr_mask64	=3D GENMASK_ULL(edx.split.num_counters_fixed - =
1, 0);
 	x86_add_quirk(zhaoxin_arch_events_quirk);
=20
 	switch (boot_cpu_data.x86) {
@@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void)
 		return -ENODEV;
 	}
=20
-	x86_pmu.intel_ctrl =3D (1 << (x86_pmu.num_counters)) - 1;
-	x86_pmu.intel_ctrl |=3D ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_=
PMC_IDX_FIXED;
+	x86_pmu.intel_ctrl =3D x86_pmu.cntr_mask64;
+	x86_pmu.intel_ctrl |=3D x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
=20
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			c->idxmsk64 |=3D (1ULL << x86_pmu.num_counters) - 1;
-			c->weight +=3D x86_pmu.num_counters;
+			c->idxmsk64 |=3D x86_pmu.cntr_mask64;
+			c->weight +=3D x86_pmu_num_counters(NULL);
 		}
 	}
=20
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 44381186E24
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:06 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412508; cv=none;
 b=SrDwADzxcO3x987K1XZy5VR3GOE6rmTEQtCXFX4xsr7EloMwKzXpzxO+Ew/Viyof6GGTPVogdD78nTtx2JcCsQWAQzXf5aHT/URC/kwLq5aB0arnOglDdWbw2gBJwIfeHjs1n1jYKL35/8w9DqP3MVwp6CN8NXJqO/tYJ8V9ego=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412508; c=relaxed/simple;
	bh=8rbS9QKRTa+3pFbRfuyI1O97yB9JAE3Hf2uPD8n08sA=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=Cr95+6j+Oi8wYLld2tbr88bRUBZlzuGeLP36ItFhXox0fI/CkealsryusHjcwLu+7/Lghy9143TMtpoxZ56fx6GzgwBHnx5Oc/Xg8cCA3LkyqoajpP0gq9bYhORZnpmvyjvH6sYVsEayFnwf2Vh4kaezRKTP0o3+ie/tpAzgvI8=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=jLFTvRtT; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="jLFTvRtT"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412506; x=1750948506;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=8rbS9QKRTa+3pFbRfuyI1O97yB9JAE3Hf2uPD8n08sA=;
  b=jLFTvRtTiOr3h9E91MCSXUFZiydmA3Nl/lKuNfE3yz2P/DpQxSxYpm8z
   0BM7ihmOWB80/eZ/F3R3255B7+v/4iBrL6n/Lqx82phX1wE6jTHMIxUSl
   TG73UMhQzkSMxj4EP/Z9LETL/1H+KXCmZK+WyTW+gfZhY2V0C19d+Rl6R
   ST1hVNZZkDn4CcVKCuwUlw9AYzYqrsS0x4lcRqnAht2yHRN1lidOpPY0g
   qjzcNPMrX7CZ7uHZ9g+KS1N9rQfYnvcD9myPgqzV/+iNEqyzVWwU8NEgS
   tzByFELKLU99wGSu4HOXx9ADxNCnT9XElbbevqUYCO7w7ti2mYvx76hdW
   w==;
X-CSE-ConnectionGUID: Ad/vGB0LQ9W6/bws2SnepQ==
X-CSE-MsgGUID: 0Tgf10fdRgiDaO/0fxClmg==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375528"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375528"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:45 -0700
X-CSE-ConnectionGUID: ZCbhEdbAQFiv6qk3vKTyuw==
X-CSE-MsgGUID: 2iBYwsF3QvqOJH8YaKDDhw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911957"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:45 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 03/13] perf/x86: Add Lunar Lake and Arrow Lake support
Date: Wed, 26 Jun 2024 07:35:35 -0700
Message-Id: <20240626143545.480761-4-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

From PMU's perspective, Lunar Lake and Arrow Lake are similar to the
previous generation Meteor Lake. Both are hybrid platforms, with e-core
and p-core.

The key differences include:
- The e-core supports 3 new fixed counters
- The p-core supports an updated PEBS Data Source format
- More GP counters (Updated event constraint table)
- New Architectural performance monitoring V6
  (New Perfmon MSRs aliasing, umask2, eq).
- New PEBS format V6 (Counters Snapshotting group)
- New RDPMC metrics clear mode

The legacy features, the 3 new fixed counters and updated event
constraint table are enabled in this patch.

The new PEBS data source format, the architectural performance
monitoring V6, the PEBS format V6, and the new RDPMC metrics clear mode
are supported in the following patches.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c      | 117 ++++++++++++++++++++++++++++++
 arch/x86/events/intel/ds.c        |  24 ++++++
 arch/x86/events/perf_event.h      |   2 +
 arch/x86/include/asm/perf_event.h |   4 +
 4 files changed, 147 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aaf3884073ba..73dbbbdcc421 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -220,6 +220,17 @@ static struct event_constraint intel_grt_event_constra=
ints[] __read_mostly =3D {
 	EVENT_CONSTRAINT_END
 };
=20
+static struct event_constraint intel_skt_event_constraints[] __read_mostly=
 =3D {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+	FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */
+	FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */
+	FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */
+	EVENT_CONSTRAINT_END
+};
+
 static struct event_constraint intel_skl_event_constraints[] =3D {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
@@ -370,6 +381,55 @@ static struct extra_reg intel_rwc_extra_regs[] __read_=
mostly =3D {
 	EVENT_EXTRA_END
 };
=20
+static struct event_constraint intel_lnc_event_constraints[] =3D {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2),	/* CPU_CLK_UNHALTED.REF_TSC_P */
+	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
+	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
+
+	INTEL_UEVENT_CONSTRAINT(0x0148, 0x4),
+	INTEL_UEVENT_CONSTRAINT(0x0175, 0x4),
+
+	INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff),
+	INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff),
+	/*
+	 * Generally event codes < 0x90 are restricted to counters 0-3.
+	 * The 0x2E and 0x3C are exception, which has no restriction.
+	 */
+	INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),
+
+	INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
+	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
+	INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
+	INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
+	INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1),
+	INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8),
+	INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3),
+	INTEL_EVENT_CONSTRAINT(0xce, 0x1),
+
+	INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
+	/*
+	 * Generally event codes >=3D 0x90 are likely to have no restrictions.
+	 * The exception are defined as above.
+	 */
+	INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff),
+
+	EVENT_CONSTRAINT_END
+};
+
+
 EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=3D0x0b,umask=3D0x10,ldlat=3D3=
");
 EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=3D0xcd,umask=3D0x1,ldlat=3D3"=
);
 EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=3D0xcd,umask=3D0x2");
@@ -5790,6 +5850,23 @@ static struct attribute *adl_hybrid_events_attrs[] =
=3D {
 	NULL,
 };
=20
+EVENT_ATTR_STR_HYBRID(topdown-retiring,      td_retiring_lnl,  "event=3D0x=
c2,umask=3D0x02;event=3D0x00,umask=3D0x80", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-fe-bound,      td_fe_bound_lnl,  "event=3D0x=
9c,umask=3D0x01;event=3D0x00,umask=3D0x82", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-be-bound,      td_be_bound_lnl,  "event=3D0x=
a4,umask=3D0x02;event=3D0x00,umask=3D0x83", hybrid_big_small);
+
+static struct attribute *lnl_hybrid_events_attrs[] =3D {
+	EVENT_PTR(slots_adl),
+	EVENT_PTR(td_retiring_lnl),
+	EVENT_PTR(td_bad_spec_adl),
+	EVENT_PTR(td_fe_bound_lnl),
+	EVENT_PTR(td_be_bound_lnl),
+	EVENT_PTR(td_heavy_ops_adl),
+	EVENT_PTR(td_br_mis_adl),
+	EVENT_PTR(td_fetch_lat_adl),
+	EVENT_PTR(td_mem_bound_adl),
+	NULL
+};
+
 /* Must be in IDX order */
 EVENT_ATTR_STR_HYBRID(mem-loads,     mem_ld_adl,     "event=3D0xd0,umask=
=3D0x5,ldlat=3D3;event=3D0xcd,umask=3D0x1,ldlat=3D3", hybrid_big_small);
 EVENT_ATTR_STR_HYBRID(mem-stores,    mem_st_adl,     "event=3D0xd0,umask=
=3D0x6;event=3D0xcd,umask=3D0x2",                 hybrid_big_small);
@@ -6139,6 +6216,21 @@ static __always_inline void intel_pmu_init_grt(struc=
t pmu *pmu)
 	intel_pmu_ref_cycles_ext();
 }
=20
+static __always_inline void intel_pmu_init_lnc(struct pmu *pmu)
+{
+	intel_pmu_init_glc(pmu);
+	hybrid(pmu, event_constraints) =3D intel_lnc_event_constraints;
+	hybrid(pmu, pebs_constraints) =3D intel_lnc_pebs_event_constraints;
+	hybrid(pmu, extra_regs) =3D intel_rwc_extra_regs;
+}
+
+static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
+{
+	intel_pmu_init_grt(pmu);
+	hybrid(pmu, event_constraints) =3D intel_skt_event_constraints;
+	hybrid(pmu, extra_regs) =3D intel_cmt_extra_regs;
+}
+
 __init int intel_pmu_init(void)
 {
 	struct attribute **extra_skl_attr =3D &empty_attrs;
@@ -6864,6 +6956,31 @@ __init int intel_pmu_init(void)
 		name =3D "meteorlake_hybrid";
 		break;
=20
+	case INTEL_LUNARLAKE_M:
+	case INTEL_ARROWLAKE:
+		intel_pmu_init_hybrid(hybrid_big_small);
+
+		x86_pmu.get_event_constraints =3D mtl_get_event_constraints;
+		x86_pmu.hw_config =3D adl_hw_config;
+
+		td_attr =3D lnl_hybrid_events_attrs;
+		mem_attr =3D mtl_hybrid_mem_attrs;
+		tsx_attr =3D adl_hybrid_tsx_attrs;
+		extra_attr =3D boot_cpu_has(X86_FEATURE_RTM) ?
+			mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+		/* Initialize big core specific PerfMon capabilities.*/
+		pmu =3D &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+		intel_pmu_init_lnc(&pmu->pmu);
+
+		/* Initialize Atom core specific PerfMon capabilities.*/
+		pmu =3D &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+		intel_pmu_init_skt(&pmu->pmu);
+
+		pr_cont("Lunarlake Hybrid events, ");
+		name =3D "lunarlake_hybrid";
+		break;
+
 	default:
 		switch (x86_pmu.version) {
 		case 1:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 6f834a7d852a..79e23dec6714 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1086,6 +1086,30 @@ struct event_constraint intel_glc_pebs_event_constra=
ints[] =3D {
 	EVENT_CONSTRAINT_END
 };
=20
+struct event_constraint intel_lnc_pebs_event_constraints[] =3D {
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PRE=
C_DIST */
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
+
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED=
.STLB_MISS_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED=
.STLB_MISS_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED=
.LOCK_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED=
.SPLIT_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED=
.SPLIT_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED=
.ALL_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED=
.ALL_STORES */
+
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
+
+	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
+
+	/*
+	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
+	 * need the full constraints from the main table.
+	 */
+
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *pebs_constraints =3D hybrid(event->pmu, pebs_con=
straints);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index b3214d6e8f4c..3c781dabce76 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1582,6 +1582,8 @@ extern struct event_constraint intel_icl_pebs_event_c=
onstraints[];
=20
 extern struct event_constraint intel_glc_pebs_event_constraints[];
=20
+extern struct event_constraint intel_lnc_pebs_event_constraints[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
=20
 void intel_pmu_pebs_add(struct perf_event *event);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 7f1e17250546..400c909b8658 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -307,6 +307,10 @@ struct x86_pmu_capability {
 #define INTEL_PMC_IDX_FIXED_SLOTS	(INTEL_PMC_IDX_FIXED + 3)
 #define INTEL_PMC_MSK_FIXED_SLOTS	(1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
=20
+/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */
+/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */
+/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */
+
 static inline bool use_fixed_pseudo_encoding(u64 code)
 {
 	return !(code & 0xff);
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0AD4A186E40
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:06 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412508; cv=none;
 b=bVvFXmefMQumOoJCU0cvviAS2ZECSbwI6U0fkJVM55ZlfNAib5FrYLKv6khPAiHd2J/87etPHlxVAQR1Rgr250+4/E/pSbg/lNRtWwn/PKZ0EI8xI5li4IDqi3EMv1yL5yJhXCYpUUrG2Q2DtWNtgfb+pdAuVdZIq9Ilui6OU2Y=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412508; c=relaxed/simple;
	bh=h3+E6xo8m2aoLch1VVSjx1K+YOw6jynYWGWD8Oa2mvA=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=gyhHrMImb96g6KwvPx8lMYy8yqJ1UyBqzV/Mc3b9pNzQbu4ghhKQfpbRxhxIH01wvzCd2puXsIUHlQMLqG/bODblZFaSL9HK9wBm1otF679TrCkdx2fTBMGwDkaJ5xTfmWDQZTuuM1qGXNldKyg8HrOgbdK9v61vnH6vjxH7ni0=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=IpKLb3zr; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="IpKLb3zr"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412507; x=1750948507;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=h3+E6xo8m2aoLch1VVSjx1K+YOw6jynYWGWD8Oa2mvA=;
  b=IpKLb3zrfiSzTeJd2OwzvPK0De1GWpQAzfb7atOGHvcUgP7wfHs3brOl
   qzUvt9OX0q66NbkPxniUTmESB2WW7+xqoWO7sEbQJ6Tv5FjqjYXZF0tDI
   FCiS5p40k344NCu0RFpo2W//EmkNpheyZIri9ktRqZ3k4GWNBGQvjnLe4
   ukK36aKddRiHm7AMteKE51c/ERS4MtAPM6SeNpmwGhzCQeJEHOpdFP+4o
   iD1PfeSxT+EW2tg5LyLlDcJlMPptR/7lnevdsbX4ZtUgWAxaBqlQVFSal
   HE/LEmuUK/P8PJ7VK+R3TrT/cOtaC6jiOhV1rxKRzXJasJElTn9ETmtQc
   g==;
X-CSE-ConnectionGUID: MQk68KxIQIemM1reC6YLWw==
X-CSE-MsgGUID: xKkZQ1JfRpOdNoKBsWDotg==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375532"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375532"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:45 -0700
X-CSE-ConnectionGUID: 2SaK/gLXRH6bJa2w8q1b1g==
X-CSE-MsgGUID: IbGnCGuRSKyWdHj0ogCfDg==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911962"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:45 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 04/13] perf/x86/intel: Rename model-specific
 pebs_latency_data functions
Date: Wed, 26 Jun 2024 07:35:36 -0700
Message-Id: <20240626143545.480761-5-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The model-specific pebs_latency_data functions of ADL and MTL use the
"small" as a postfix to indicate the e-core. The postfix is too generic
for a model-specific function. It cannot provide useful information that
can directly map it to a specific uarch, which can facilitate the
development and maintenance.
Use the abbr of the uarch to rename the model-specific functions.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c |  8 ++++----
 arch/x86/events/intel/ds.c   | 20 ++++++++++----------
 arch/x86/events/perf_event.h |  4 ++--
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 73dbbbdcc421..50033023125d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6509,7 +6509,7 @@ __init int intel_pmu_init(void)
 	case INTEL_ATOM_GRACEMONT:
 		intel_pmu_init_grt(NULL);
 		intel_pmu_pebs_data_source_grt();
-		x86_pmu.pebs_latency_data =3D adl_latency_data_small;
+		x86_pmu.pebs_latency_data =3D adl_latency_data_grt;
 		x86_pmu.get_event_constraints =3D tnt_get_event_constraints;
 		td_attr =3D tnt_events_attrs;
 		mem_attr =3D grt_mem_attrs;
@@ -6523,7 +6523,7 @@ __init int intel_pmu_init(void)
 		intel_pmu_init_grt(NULL);
 		x86_pmu.extra_regs =3D intel_cmt_extra_regs;
 		intel_pmu_pebs_data_source_cmt();
-		x86_pmu.pebs_latency_data =3D mtl_latency_data_small;
+		x86_pmu.pebs_latency_data =3D mtl_latency_data_cmt;
 		x86_pmu.get_event_constraints =3D cmt_get_event_constraints;
 		td_attr =3D cmt_events_attrs;
 		mem_attr =3D grt_mem_attrs;
@@ -6874,7 +6874,7 @@ __init int intel_pmu_init(void)
 		 */
 		intel_pmu_init_hybrid(hybrid_big_small);
=20
-		x86_pmu.pebs_latency_data =3D adl_latency_data_small;
+		x86_pmu.pebs_latency_data =3D adl_latency_data_grt;
 		x86_pmu.get_event_constraints =3D adl_get_event_constraints;
 		x86_pmu.hw_config =3D adl_hw_config;
 		x86_pmu.get_hybrid_cpu_type =3D adl_get_hybrid_cpu_type;
@@ -6931,7 +6931,7 @@ __init int intel_pmu_init(void)
 	case INTEL_METEORLAKE_L:
 		intel_pmu_init_hybrid(hybrid_big_small);
=20
-		x86_pmu.pebs_latency_data =3D mtl_latency_data_small;
+		x86_pmu.pebs_latency_data =3D mtl_latency_data_cmt;
 		x86_pmu.get_event_constraints =3D mtl_get_event_constraints;
 		x86_pmu.hw_config =3D adl_hw_config;
=20
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 79e23dec6714..8a11f72a22b6 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -257,8 +257,8 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb=
, bool lock)
 }
=20
 /* Retrieve the latency data for e-core of ADL */
-static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
-				     u8 dse, bool tlb, bool lock, bool blk)
+static u64 __adl_latency_data_grt(struct perf_event *event, u64 status,
+				  u8 dse, bool tlb, bool lock, bool blk)
 {
 	u64 val;
=20
@@ -277,27 +277,27 @@ static u64 __adl_latency_data_small(struct perf_event=
 *event, u64 status,
 	return val;
 }
=20
-u64 adl_latency_data_small(struct perf_event *event, u64 status)
+u64 adl_latency_data_grt(struct perf_event *event, u64 status)
 {
 	union intel_x86_pebs_dse dse;
=20
 	dse.val =3D status;
=20
-	return __adl_latency_data_small(event, status, dse.ld_dse,
-					dse.ld_locked, dse.ld_stlb_miss,
-					dse.ld_data_blk);
+	return __adl_latency_data_grt(event, status, dse.ld_dse,
+				      dse.ld_locked, dse.ld_stlb_miss,
+				      dse.ld_data_blk);
 }
=20
 /* Retrieve the latency data for e-core of MTL */
-u64 mtl_latency_data_small(struct perf_event *event, u64 status)
+u64 mtl_latency_data_cmt(struct perf_event *event, u64 status)
 {
 	union intel_x86_pebs_dse dse;
=20
 	dse.val =3D status;
=20
-	return __adl_latency_data_small(event, status, dse.mtl_dse,
-					dse.mtl_stlb_miss, dse.mtl_locked,
-					dse.mtl_fwd_blk);
+	return __adl_latency_data_grt(event, status, dse.mtl_dse,
+				      dse.mtl_stlb_miss, dse.mtl_locked,
+				      dse.mtl_fwd_blk);
 }
=20
 static u64 load_latency_data(struct perf_event *event, u64 status)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 3c781dabce76..e9374b4360d4 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1548,9 +1548,9 @@ void intel_pmu_disable_bts(void);
=20
 int intel_pmu_drain_bts_buffer(void);
=20
-u64 adl_latency_data_small(struct perf_event *event, u64 status);
+u64 adl_latency_data_grt(struct perf_event *event, u64 status);
=20
-u64 mtl_latency_data_small(struct perf_event *event, u64 status);
+u64 mtl_latency_data_cmt(struct perf_event *event, u64 status);
=20
 extern struct event_constraint intel_core2_pebs_event_constraints[];
=20
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 47584187322
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:08 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412510; cv=none;
 b=XjhM/jKs3ibErYKiNQf7AalKagMvfBG0EsI8bE0jWdch8H4x3qAb1LygsJD/LNEr2NnGni42O0i2w3Kyq6Ew7Y0j+NpCjScho1HOK9o71NIcZgF7aIXyD43+GX1Qe1D8vui7lYFrN1+9uKf2l1lQjvI+7CYkrz++gtSGC1pOX1k=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412510; c=relaxed/simple;
	bh=/AjHT21+5Qh99IL9tyX04a4UmIlgPKlq4AQjUzEv5Fc=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=ul9BVDyPDafpFfV7cUXXSF88r7SC8Vt3sRIqwXk0TfUL+kH36CGHgBUR7qc+xhAkcIc0DaalTce/ipK5W64VVSMz0JK75kJ46ApDbchmbwQEYmiZnoPGtRitXWTrCiyYhtQ7+ToVQ5NAxZ8f3F6usBt/TGj/zt9LFpk03vfTKa8=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=DmlsabKQ; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="DmlsabKQ"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412508; x=1750948508;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=/AjHT21+5Qh99IL9tyX04a4UmIlgPKlq4AQjUzEv5Fc=;
  b=DmlsabKQuSAXatiVitl/yted412CxWBs2aMkJsqtxdfguQR89E+SCpz9
   hxj1pPTtInuZZLs44LeAraJqCKADrYPypAIMYI2JTof9f753+N3D0Ows8
   OlpD+GI7dfKZ5P1MdZ7qznE8H7/BsP2027UVmct/eVax3y7iUEnkzAyhB
   uZr2we9mXQcnvyE0SmBcfpNvAkA0O0zIaTROebe4XjR8Zxp4a5IqGZrp6
   5wW7o/gCEUsTDbv2R2F2+kAhW5qajINiyTfB5slglTWerJ8oLI2/tfI2C
   +DeYpXepzQKSkDw2vl/Lnlspa4GtdsY8iv0CDAa6bnjtN7Jza7g5YgJxW
   w==;
X-CSE-ConnectionGUID: PjDfMAGDRNeeeF1pMTQmVA==
X-CSE-MsgGUID: QitBoG50S3+RaKpA44Ocow==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375537"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375537"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:45 -0700
X-CSE-ConnectionGUID: UHRFJ8GJRC6+U54W4uwYBw==
X-CSE-MsgGUID: Mz8BR8jcR0iCTaiUqYKlqQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911965"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:45 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 05/13] perf/x86/intel: Support new data source for Lunar
 Lake
Date: Wed, 26 Jun 2024 07:35:37 -0700
Message-Id: <20240626143545.480761-6-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

A new PEBS data source format is introduced for the p-core of Lunar
Lake. The data source field is extended to 8 bits with new encodings.

A new layout is introduced into the union intel_x86_pebs_dse.
Introduce the lnl_latency_data() to parse the new format.
Enlarge the pebs_data_source[] accordingly to include new encodings.

Only the mem load and the mem store events can generate the data source.
Introduce INTEL_HYBRID_LDLAT_CONSTRAINT and
INTEL_HYBRID_STLAT_CONSTRAINT to mark them.

Add two new bits for the new cache-related data src, L2_MHB and MSC.
The L2_MHB is short for L2 Miss Handling Buffer, which is similar to
LFB (Line Fill Buffer), but to track the L2 Cache misses.
The MSC stands for the memory-side cache.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c    |  2 +
 arch/x86/events/intel/ds.c      | 88 ++++++++++++++++++++++++++++++++-
 arch/x86/events/perf_event.h    | 16 +++++-
 include/uapi/linux/perf_event.h |  6 ++-
 4 files changed, 107 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 50033023125d..3bf3d6b619ed 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6960,6 +6960,7 @@ __init int intel_pmu_init(void)
 	case INTEL_ARROWLAKE:
 		intel_pmu_init_hybrid(hybrid_big_small);
=20
+		x86_pmu.pebs_latency_data =3D lnl_latency_data;
 		x86_pmu.get_event_constraints =3D mtl_get_event_constraints;
 		x86_pmu.hw_config =3D adl_hw_config;
=20
@@ -6977,6 +6978,7 @@ __init int intel_pmu_init(void)
 		pmu =3D &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
 		intel_pmu_init_skt(&pmu->pmu);
=20
+		intel_pmu_pebs_data_source_lnl();
 		pr_cont("Lunarlake Hybrid events, ");
 		name =3D "lunarlake_hybrid";
 		break;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8a11f72a22b6..ce7e98409f29 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -63,6 +63,15 @@ union intel_x86_pebs_dse {
 		unsigned int mtl_fwd_blk:1;
 		unsigned int ld_reserved4:24;
 	};
+	struct {
+		unsigned int lnc_dse:8;
+		unsigned int ld_reserved5:2;
+		unsigned int lnc_stlb_miss:1;
+		unsigned int lnc_locked:1;
+		unsigned int lnc_data_blk:1;
+		unsigned int lnc_addr_blk:1;
+		unsigned int ld_reserved6:18;
+	};
 };
=20
=20
@@ -77,7 +86,7 @@ union intel_x86_pebs_dse {
 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
=20
 /* Version for Sandy Bridge and later */
-static u64 pebs_data_source[] =3D {
+static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] =3D {
 	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
 	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
 	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
@@ -173,6 +182,40 @@ void __init intel_pmu_pebs_data_source_cmt(void)
 	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
 }
=20
+/* Version for Lion Cove and later */
+static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] =3D {
+	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),	/* 0x00: ukn L3 */
+	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x01: L1 hit */
+	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),	/* 0x02: L1 hit */
+	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE),	/* 0x03: LFB/L1 Miss H=
andling Buffer hit */
+	0,							/* 0x04: Reserved */
+	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),	/* 0x05: L2 Hit */
+	OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE),			/* 0x06: L2 Miss Handling Buffe=
r Hit */
+	0,							/* 0x07: Reserved */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),	/* 0x08: L3 Hit */
+	0,							/* 0x09: Reserved */
+	0,							/* 0x0a: Reserved */
+	0,							/* 0x0b: Reserved */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOPX, FWD),	/* 0x0c: L3 Hit Snoop F=
wd */
+	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),	/* 0x0d: L3 Hit Snoop H=
itM */
+	0,							/* 0x0e: Reserved */
+	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),	/*=
 0x0f: L3 Miss Snoop HitM */
+	OP_LH | LEVEL(MSC) | P(SNOOP, NONE),			/* 0x10: Memory-side Cache Hit */
+	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Me=
mory Hit */
+};
+
+void __init intel_pmu_pebs_data_source_lnl(void)
+{
+	u64 *data_source;
+
+	data_source =3D x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_sou=
rce;
+	memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
+
+	data_source =3D x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_sou=
rce;
+	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+	__intel_pmu_pebs_data_source_cmt(data_source);
+}
+
 static u64 precise_store_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
@@ -264,7 +307,7 @@ static u64 __adl_latency_data_grt(struct perf_event *ev=
ent, u64 status,
=20
 	WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type =3D=3D hybrid_big);
=20
-	dse &=3D PERF_PEBS_DATA_SOURCE_MASK;
+	dse &=3D PERF_PEBS_DATA_SOURCE_GRT_MASK;
 	val =3D hybrid_var(event->pmu, pebs_data_source)[dse];
=20
 	pebs_set_tlb_lock(&val, tlb, lock);
@@ -300,6 +343,45 @@ u64 mtl_latency_data_cmt(struct perf_event *event, u64=
 status)
 				      dse.mtl_fwd_blk);
 }
=20
+u64 lnl_latency_data(struct perf_event *event, u64 status)
+{
+	struct x86_hybrid_pmu *pmu =3D hybrid_pmu(event->pmu);
+	union intel_x86_pebs_dse dse;
+	union perf_mem_data_src src;
+	u64 val;
+
+	if (pmu->pmu_type =3D=3D hybrid_small)
+		return mtl_latency_data_cmt(event, status);
+
+	dse.val =3D status;
+
+	/* LNC core latency data */
+	val =3D hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_=
SOURCE_MASK];
+	if (!val)
+		val =3D P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
+
+	if (dse.lnc_stlb_miss)
+		val |=3D P(TLB, MISS) | P(TLB, L2);
+	else
+		val |=3D P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+	if (dse.lnc_locked)
+		val |=3D P(LOCK, LOCKED);
+
+	if (dse.lnc_data_blk)
+		val |=3D P(BLK, DATA);
+	if (dse.lnc_addr_blk)
+		val |=3D P(BLK, ADDR);
+	if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
+		val |=3D P(BLK, NA);
+
+	src.val =3D val;
+	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+		src.mem_op =3D P(OP, STORE);
+
+	return src.val;
+}
+
 static u64 load_latency_data(struct perf_event *event, u64 status)
 {
 	union intel_x86_pebs_dse dse;
@@ -1090,6 +1172,8 @@ struct event_constraint intel_lnc_pebs_event_constrai=
nts[] =3D {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PRE=
C_DIST */
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
=20
+	INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff),
+	INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED=
.STLB_MISS_LOADS */
 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED=
.STLB_MISS_STORES */
 	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED=
.LOCK_LOADS */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e9374b4360d4..0d333bb9c8f4 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -476,6 +476,14 @@ struct cpu_hw_events {
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
=20
+#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_=
HSW)
+
+#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_=
HSW)
+
 /* Event constraint, but match on all event flags too. */
 #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
 	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -655,8 +663,10 @@ enum {
 	x86_lbr_exclusive_max,
 };
=20
-#define PERF_PEBS_DATA_SOURCE_MAX	0x10
+#define PERF_PEBS_DATA_SOURCE_MAX	0x100
 #define PERF_PEBS_DATA_SOURCE_MASK	(PERF_PEBS_DATA_SOURCE_MAX - 1)
+#define PERF_PEBS_DATA_SOURCE_GRT_MAX	0x10
+#define PERF_PEBS_DATA_SOURCE_GRT_MASK	(PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
=20
 enum hybrid_cpu_type {
 	HYBRID_INTEL_NONE,
@@ -1552,6 +1562,8 @@ u64 adl_latency_data_grt(struct perf_event *event, u6=
4 status);
=20
 u64 mtl_latency_data_cmt(struct perf_event *event, u64 status);
=20
+u64 lnl_latency_data(struct perf_event *event, u64 status);
+
 extern struct event_constraint intel_core2_pebs_event_constraints[];
=20
 extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1673,6 +1685,8 @@ void intel_pmu_pebs_data_source_mtl(void);
=20
 void intel_pmu_pebs_data_source_cmt(void);
=20
+void intel_pmu_pebs_data_source_lnl(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
=20
 void intel_pt_interrupt(void);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_even=
t.h
index 3a64499b0f5d..4842c36fdf80 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1349,12 +1349,14 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0x7 available */
+#define PERF_MEM_LVLNUM_L2_MHB	0x05 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC	0x06 /* Memory-side Cache */
+/* 0x7 available */
 #define PERF_MEM_LVLNUM_UNC	0x08 /* Uncached */
 #define PERF_MEM_LVLNUM_CXL	0x09 /* CXL */
 #define PERF_MEM_LVLNUM_IO	0x0a /* I/O */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
+#define PERF_MEM_LVLNUM_LFB	0x0c /* LFB / L1 Miss Handling Buffer */
 #define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
 #define PERF_MEM_LVLNUM_PMEM	0x0e /* PMEM */
 #define PERF_MEM_LVLNUM_NA	0x0f /* N/A */
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id E3200187348
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:08 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412510; cv=none;
 b=j7Ow5aW9fev5hG5CVxQheWggjX02bTqt3vP6ZZ5amXKeDthuL7rMboVJj3zmErfCbIOH2NzsajJgHuJ0fV4wyDYf9KWf2x0BOOOj6dimOViRZdDq/kmrAu2X0orjtkMO+XVlobUzYf0nODejxaWacTzv8VR4Y86DS2ce4hN0WsM=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412510; c=relaxed/simple;
	bh=zqZgH4e0sqHbo9E3DyRRsVtlWPUMegwerPATYmVgHm0=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=pL70z8cNuJZ73eaPu/49M2df+EO5Zqs2jcCgNsfbMV0IWhF2+hH2UNDRs4R81Y2UlSCIHVlPsPApVp5niuchFAcaMIoM9ObxWBcGl4EYSzd8bkndxLjAsmp1NHPb7gkiAnCGyX/q9j26//MR+Ju0PX4V+y4HW1lXp18AQZgRx54=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=etpHW/0C; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="etpHW/0C"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412509; x=1750948509;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=zqZgH4e0sqHbo9E3DyRRsVtlWPUMegwerPATYmVgHm0=;
  b=etpHW/0CNsjU6rDlblrWJGgmsno7mvQuxNjDgnySR1hfuB2ZlF+n+6ua
   mxke7zhVG3GzcG2NgrqGxbyFh2I4eN0Da5RfCI181uDI2nJPYpAabdY+y
   K2NbpSfoY8nQLkXZGFqBcOlfYz/PJRrcWsBfYGyB1LKhM1GWkA1mwl484
   CdR9PBQyXZ9GsQeIWGzdS2P4N1JEghDRQeTz/7rDDl6WKTUB3LDV9/X93
   GC9CVVTVLDZxNbJtA3ik4W60DGwmehmaeDzdO09jElh6JVP/IRzVQyX2f
   uR/K/MjGBA77Idv3tvc0mOZ+ItOR4jsAFdmufKimYp6saNFfOXdxj6zI8
   w==;
X-CSE-ConnectionGUID: 0N8Ht90oRJWXJYt4nlpRmw==
X-CSE-MsgGUID: aeqgVxjcQc25r3qhLyGaPw==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375542"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375542"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:46 -0700
X-CSE-ConnectionGUID: e7jGv4OATT6CsKDM+F1TFw==
X-CSE-MsgGUID: kEqgDjLHR6K6t9pSZXvrNQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911968"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:45 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [PATCH V3 06/13] perf/x86: Add config_mask to represent EVENTSEL
 bitmask
Date: Wed, 26 Jun 2024 07:35:38 -0700
Message-Id: <20240626143545.480761-7-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

Different vendors may support different fields in EVENTSEL MSR, such as
Intel would introduce new fields umask2 and eq bits in EVENTSEL MSR
since Perfmon version 6. However, a fixed mask X86_RAW_EVENT_MASK is
used to filter the attr.config.

Introduce a new config_mask to record the real supported EVENTSEL
bitmask.
Only apply it to the existing code now. No functional change.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Co-developed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/core.c       | 5 ++++-
 arch/x86/events/intel/core.c | 1 +
 arch/x86/events/perf_event.h | 7 +++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 0c51cfdf7609..842dbf1d706c 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -624,7 +624,7 @@ int x86_pmu_hw_config(struct perf_event *event)
 		event->hw.config |=3D ARCH_PERFMON_EVENTSEL_OS;
=20
 	if (event->attr.type =3D=3D event->pmu->type)
-		event->hw.config |=3D event->attr.config & X86_RAW_EVENT_MASK;
+		event->hw.config |=3D x86_pmu_get_event_config(event);
=20
 	if (event->attr.sample_period && x86_pmu.limit_period) {
 		s64 left =3D event->attr.sample_period;
@@ -2098,6 +2098,9 @@ static int __init init_hw_perf_events(void)
 	if (!x86_pmu.intel_ctrl)
 		x86_pmu.intel_ctrl =3D x86_pmu.cntr_mask64;
=20
+	if (!x86_pmu.config_mask)
+		x86_pmu.config_mask =3D X86_RAW_EVENT_MASK;
+
 	perf_events_lapic_init();
 	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
=20
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 3bf3d6b619ed..12b78febcd09 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6144,6 +6144,7 @@ static __always_inline int intel_pmu_init_hybrid(enum=
 hybrid_pmu_type pmus)
 		pmu->cntr_mask64 =3D x86_pmu.cntr_mask64;
 		pmu->fixed_cntr_mask64 =3D x86_pmu.fixed_cntr_mask64;
 		pmu->pebs_events_mask =3D intel_pmu_pebs_mask(pmu->cntr_mask64);
+		pmu->config_mask =3D X86_RAW_EVENT_MASK;
 		pmu->unconstrained =3D (struct event_constraint)
 				     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
 							0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 0d333bb9c8f4..a226565a9333 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -695,6 +695,7 @@ struct x86_hybrid_pmu {
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
 	u64				pebs_events_mask;
+	u64				config_mask;
 	union {
 			u64		cntr_mask64;
 			unsigned long	cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -790,6 +791,7 @@ struct x86_pmu {
 	int		(*rdpmc_index)(int index);
 	u64		(*event_map)(int);
 	int		max_events;
+	u64		config_mask;
 	union {
 			u64		cntr_mask64;
 			unsigned long	cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -1241,6 +1243,11 @@ static inline int x86_pmu_max_num_counters_fixed(str=
uct pmu *pmu)
 	return find_last_bit(hybrid(pmu, fixed_cntr_mask), INTEL_PMC_MAX_FIXED) +=
 1;
 }
=20
+static inline u64 x86_pmu_get_event_config(struct perf_event *event)
+{
+	return event->attr.config & hybrid(event->pmu, config_mask);
+}
+
 extern struct event_constraint emptyconstraint;
=20
 extern struct event_constraint unconstrained;
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 48BE4187560
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:10 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412512; cv=none;
 b=Kk1IgOXK2r8w5QIxuQaUarcZ3DscVw6FNMVGsrgBMastTSMtzXnBRcJ03EUMOnuqKA8cVtMUQ2zeRW97Ahqic4LRFjUDxucwcZvBKqyVAdSy2ZTzl+x4JdczlAL7Mkwf4/HqkSf3jnbylQUvg9tMcStnHbb0DIxPH5BY0Uk+Xok=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412512; c=relaxed/simple;
	bh=7HZgYyByDGiOzLVc85oOGYYnHY5mXey/n6l55ptCs+Q=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=h3Ichdrdv2QUIVkC6SiriQr0ZqYku/kz41aQ/jsaIPq437WD49qy+6+oChC5Nqu0vCuSOmDvHD5iVcm3Hc8VDyYHkoZ0Fe3ajEgEgxp4PPZlpiBg5QC3Ni2Pi0f/a/2FhTl77GPSPkPvB3Ul0oVfOPHHU4/rLJ3iAOhB5lxg/rA=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=i5i3GcgH; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="i5i3GcgH"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412511; x=1750948511;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=7HZgYyByDGiOzLVc85oOGYYnHY5mXey/n6l55ptCs+Q=;
  b=i5i3GcgHOxVsT2s9Wj+EBRu2uMKC/lrQMNLBCm+iUJX7TPQguRk7Tzli
   qPUy5+HQhr9a5niNHcbt1uVAjoV2QqrSIlQb581NEMBbB9LLOPmx7R2b3
   c2JkTcQ9+NY/gM/Pp4vXQgjbfFg7RPo21JYSTdF+ZwuTTTKh1d3OFAmmf
   kyGANXBM2dGLFiAQp3CubSRsP4q5ujH3nfxD6lxnkDK4kVJJpuGGNr0iB
   t26lEp1fuJPJyKeGkhU70zP638tujEvEcgI+/eN4klOd6Ok4Q8kjoFPbe
   1jOH67TLgs1f5TnHwNySdtGaRUKel8cb8GZdwjyBeCbcpw6WZ6LOCVxVA
   w==;
X-CSE-ConnectionGUID: 2r9vat/ST/GZk7IINT0EDA==
X-CSE-MsgGUID: wHa+1HaFSPWlqSmGvkd+cQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375545"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375545"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:46 -0700
X-CSE-ConnectionGUID: fMekppGYQIyKBrHNZCiZLg==
X-CSE-MsgGUID: OaSAChtpQMu2ZzfRbGQX4g==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911971"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:45 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [PATCH V3 07/13] perf/x86/intel: Support PERFEVTSEL extension
Date: Wed, 26 Jun 2024 07:35:39 -0700
Message-Id: <20240626143545.480761-8-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

Two new fields (the unit mask2, and the equal flag) are added in the
IA32_PERFEVTSELx MSRs. They can be enumerated by the CPUID.23H.0.EBX.

Update the config_mask in x86_pmu and x86_hybrid_pmu for the true layout
of the PERFEVTSEL.
Expose the new formats into sysfs if they are available. The umask
extension reuses the same format attr name "umask" as the previous
umask. Add umask2_show to determine/display the correct format
for the current machine.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Co-developed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c      | 69 +++++++++++++++++++++++++++++--
 arch/x86/include/asm/perf_event.h |  4 ++
 2 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 12b78febcd09..9dac918316a6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4632,8 +4632,55 @@ PMU_FORMAT_ATTR(pc,	"config:19"	);
 PMU_FORMAT_ATTR(any,	"config:21"	); /* v3 + */
 PMU_FORMAT_ATTR(inv,	"config:23"	);
 PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
-PMU_FORMAT_ATTR(in_tx,  "config:32");
-PMU_FORMAT_ATTR(in_tx_cp, "config:33");
+PMU_FORMAT_ATTR(in_tx,  "config:32"	);
+PMU_FORMAT_ATTR(in_tx_cp, "config:33"	);
+PMU_FORMAT_ATTR(eq,	"config:36"	); /* v6 + */
+
+static ssize_t umask2_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *page)
+{
+	u64 mask =3D hybrid(dev_get_drvdata(dev), config_mask) & ARCH_PERFMON_EVE=
NTSEL_UMASK2;
+
+	if (mask =3D=3D ARCH_PERFMON_EVENTSEL_UMASK2)
+		return sprintf(page, "config:8-15,40-47\n");
+
+	/* Roll back to the old format if umask2 is not supported. */
+	return sprintf(page, "config:8-15\n");
+}
+
+static struct device_attribute format_attr_umask2  =3D
+		__ATTR(umask, 0444, umask2_show, NULL);
+
+static struct attribute *format_evtsel_ext_attrs[] =3D {
+	&format_attr_umask2.attr,
+	&format_attr_eq.attr,
+	NULL
+};
+
+static umode_t
+evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+	struct device *dev =3D kobj_to_dev(kobj);
+	u64 mask;
+
+	/*
+	 * The umask and umask2 have different formats but share the
+	 * same attr name. In update mode, the previous value of the
+	 * umask is unconditionally removed before is_visible. If
+	 * umask2 format is not enumerated, it's impossible to roll
+	 * back to the old format.
+	 * Does the check in umask2_show rather than is_visible.
+	 */
+	if (i =3D=3D 0)
+		return attr->mode;
+
+	mask =3D hybrid(dev_get_drvdata(dev), config_mask);
+	if (i =3D=3D 1)
+		return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
+
+	return 0;
+}
=20
 static struct attribute *intel_arch_formats_attr[] =3D {
 	&format_attr_event.attr,
@@ -4786,8 +4833,14 @@ static inline bool intel_pmu_broken_perf_cap(void)
=20
 static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 {
-	unsigned int sub_bitmaps =3D cpuid_eax(ARCH_PERFMON_EXT_LEAF);
-	unsigned int eax, ebx, ecx, edx;
+	unsigned int sub_bitmaps, eax, ebx, ecx, edx;
+
+	cpuid(ARCH_PERFMON_EXT_LEAF, &sub_bitmaps, &ebx, &ecx, &edx);
+
+	if (ebx & ARCH_PERFMON_EXT_UMASK2)
+		pmu->config_mask |=3D ARCH_PERFMON_EVENTSEL_UMASK2;
+	if (ebx & ARCH_PERFMON_EXT_EQ)
+		pmu->config_mask |=3D ARCH_PERFMON_EVENTSEL_EQ;
=20
 	if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
 		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
@@ -5810,6 +5863,12 @@ static struct attribute_group group_format_extra_skl=
 =3D {
 	.is_visible =3D exra_is_visible,
 };
=20
+static struct attribute_group group_format_evtsel_ext =3D {
+	.name       =3D "format",
+	.attrs      =3D format_evtsel_ext_attrs,
+	.is_visible =3D evtsel_ext_is_visible,
+};
+
 static struct attribute_group group_default =3D {
 	.attrs      =3D intel_pmu_attrs,
 	.is_visible =3D default_is_visible,
@@ -5823,6 +5882,7 @@ static const struct attribute_group *attr_update[] =
=3D {
 	&group_caps_lbr,
 	&group_format_extra,
 	&group_format_extra_skl,
+	&group_format_evtsel_ext,
 	&group_default,
 	NULL,
 };
@@ -6042,6 +6102,7 @@ static const struct attribute_group *hybrid_attr_upda=
te[] =3D {
 	&group_caps_gen,
 	&group_caps_lbr,
 	&hybrid_group_format_extra,
+	&group_format_evtsel_ext,
 	&group_default,
 	&hybrid_group_cpus,
 	NULL,
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 400c909b8658..91b73571412f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -32,6 +32,8 @@
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
 #define ARCH_PERFMON_EVENTSEL_BR_CNTR			(1ULL << 35)
+#define ARCH_PERFMON_EVENTSEL_EQ			(1ULL << 36)
+#define ARCH_PERFMON_EVENTSEL_UMASK2			(0xFFULL << 40)
=20
 #define INTEL_FIXED_BITS_MASK				0xFULL
 #define INTEL_FIXED_BITS_STRIDE			4
@@ -185,6 +187,8 @@ union cpuid10_edx {
  * detection/enumeration details:
  */
 #define ARCH_PERFMON_EXT_LEAF			0x00000023
+#define ARCH_PERFMON_EXT_UMASK2			0x1
+#define ARCH_PERFMON_EXT_EQ			0x2
 #define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT	0x1
 #define ARCH_PERFMON_NUM_COUNTER_LEAF		0x1
=20
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id A579B187569
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:10 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412512; cv=none;
 b=Px4Cds9Y18Gyl2ZPmxZNd45mVHK0Ub5gIi6aguDBeICypcNAk0Mga/3bkaagHGfSTvCtBUk+ejQN4Gyk60V48SFhvenv/fYXPqLU2UaznimYMHJoIJZvAYvTbxzzP94kFpZxW24d0geC3+tnvjRYu6BvHziT+3FBuwLtzvbJSUA=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412512; c=relaxed/simple;
	bh=0INGDltrUKBi7ASFyKjyNRLw8dzfoyb5WOhCzaJ9g70=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=Zz5r+l5CJQngzLLO6yeeUUC8kMTqRuVdwshWugbu+zmFxQOFIWYsQF9bYbSrOzPFchfJopGFNp4KgsHaF77cVFFXgJhEWkPcRew5qptFnWo7BkdqRHAPxMe4mwEmbm/eLjXEbvq0jazIfKrSdKFQNGL3t6x8/5z+H7mhWt1TsbI=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=lNPWnHAF; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="lNPWnHAF"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412511; x=1750948511;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=0INGDltrUKBi7ASFyKjyNRLw8dzfoyb5WOhCzaJ9g70=;
  b=lNPWnHAFsH7tomW+TEKpjlnb/KMPhvxxTnnOZ8mn0uTqfxNgJDkX6W0P
   be4O83/g1Ie8RNNJ7zQso5bkVUISj0MgqAnqn1HwluCVYtYHdohsV5Dar
   kQ1OukBbboF/pDYrvLL5KUWxM+FuBBXidYiZV/nfGg6WBzOmcJZSaJXEf
   9Dr/ezsH1+JAefRC/NaiMSMrR+nnNzbo50NYtZvRGP01cRKryIvWn45FV
   mItxzyor0D1VC+SpaxA29OB6tFFrUdBb7akoMq74fvfIr92QtbN7o+aXW
   l2Nv5i9DPkN4lEz8DWqvBSTWM63dO6ikGE7NACje/aNiqDFxi/y9678qA
   g==;
X-CSE-ConnectionGUID: 1YW+gA/bRym9F01HCZxZBg==
X-CSE-MsgGUID: y0WDC33PSgyWIqN+s9EDOA==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375551"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375551"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:46 -0700
X-CSE-ConnectionGUID: ihczF5ymTqGvIzTnWbiR0w==
X-CSE-MsgGUID: 0jkFBdafSvKTCPHTVgnNbw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911974"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:45 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 08/13] perf/x86/intel: Support Perfmon MSRs aliasing
Date: Wed, 26 Jun 2024 07:35:40 -0700
Message-Id: <20240626143545.480761-9-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The architectural performance monitoring V6 supports a new range of
counters' MSRs in the 19xxH address range. They include all the GP
counter MSRs, the GP control MSRs, and the fixed counter MSRs.

The step between each sibling counter is 4. Add intel_pmu_addr_offset()
to calculate the correct offset.

Add fixedctr in struct x86_pmu to store the address of the fixed counter
0. It can be used to calculate the rest of the fixed counters.

The MSR address of the fixed counter control is not changed.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/core.c           |  7 +++----
 arch/x86/events/intel/core.c     | 17 ++++++++++++++++-
 arch/x86/events/perf_event.h     |  7 +++++++
 arch/x86/include/asm/msr-index.h |  6 ++++++
 4 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 842dbf1d706c..12f2a0c14d33 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1236,8 +1236,7 @@ static inline void x86_assign_hw_event(struct perf_ev=
ent *event,
 		fallthrough;
 	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
 		hwc->config_base =3D MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		hwc->event_base =3D MSR_ARCH_PERFMON_FIXED_CTR0 +
-				(idx - INTEL_PMC_IDX_FIXED);
+		hwc->event_base =3D x86_pmu_fixed_ctr_addr(idx - INTEL_PMC_IDX_FIXED);
 		hwc->event_base_rdpmc =3D (idx - INTEL_PMC_IDX_FIXED) |
 					INTEL_PMC_FIXED_RDPMC_BASE;
 		break;
@@ -1573,7 +1572,7 @@ void perf_event_print_debug(void)
 	for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
 		if (fixed_counter_disabled(idx, cpuc->pmu))
 			continue;
-		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+		rdmsrl(x86_pmu_fixed_ctr_addr(idx), pmc_count);
=20
 		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
 			cpu, idx, pmc_count);
@@ -2483,7 +2482,7 @@ void perf_clear_dirty_counters(void)
 			if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mas=
k)))
 				continue;
=20
-			wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+			wrmsrl(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
 		} else {
 			wrmsrl(x86_pmu_event_addr(i), 0);
 		}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9dac918316a6..3cc0f45b9b45 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2953,7 +2953,7 @@ static void intel_pmu_reset(void)
 	for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
 		if (fixed_counter_disabled(idx, cpuc->pmu))
 			continue;
-		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+		wrmsrl_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
 	}
=20
 	if (ds)
@@ -5188,6 +5188,7 @@ static __initconst const struct x86_pmu core_pmu =3D {
 	.schedule_events	=3D x86_schedule_events,
 	.eventsel		=3D MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		=3D MSR_ARCH_PERFMON_PERFCTR0,
+	.fixedctr		=3D MSR_ARCH_PERFMON_FIXED_CTR0,
 	.event_map		=3D intel_pmu_event_map,
 	.max_events		=3D ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			=3D 1,
@@ -5241,6 +5242,7 @@ static __initconst const struct x86_pmu intel_pmu =3D=
 {
 	.schedule_events	=3D x86_schedule_events,
 	.eventsel		=3D MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		=3D MSR_ARCH_PERFMON_PERFCTR0,
+	.fixedctr		=3D MSR_ARCH_PERFMON_FIXED_CTR0,
 	.event_map		=3D intel_pmu_event_map,
 	.max_events		=3D ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			=3D 1,
@@ -6176,6 +6178,11 @@ static void intel_pmu_check_extra_regs(struct extra_=
reg *extra_regs)
 	}
 }
=20
+static inline int intel_pmu_addr_offset(int index, bool eventsel)
+{
+	return MSR_IA32_PMC_V6_STEP * index;
+}
+
 static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_=
pmu_type_map[] __initconst =3D {
 	{ hybrid_small, "cpu_atom" },
 	{ hybrid_big, "cpu_core" },
@@ -7150,6 +7157,14 @@ __init int intel_pmu_init(void)
 		pr_cont("full-width counters, ");
 	}
=20
+	/* Support V6+ MSR Aliasing */
+	if (x86_pmu.version >=3D 6) {
+		x86_pmu.perfctr =3D MSR_IA32_PMC_V6_GP0_CTR;
+		x86_pmu.eventsel =3D MSR_IA32_PMC_V6_GP0_CFG_A;
+		x86_pmu.fixedctr =3D MSR_IA32_PMC_V6_FX0_CTR;
+		x86_pmu.addr_offset =3D intel_pmu_addr_offset;
+	}
+
 	if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
 		x86_pmu.intel_ctrl |=3D 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
=20
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a226565a9333..8e3f2644a1a3 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -787,6 +787,7 @@ struct x86_pmu {
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
 	unsigned	perfctr;
+	unsigned	fixedctr;
 	int		(*addr_offset)(int index, bool eventsel);
 	int		(*rdpmc_index)(int index);
 	u64		(*event_map)(int);
@@ -1144,6 +1145,12 @@ static inline unsigned int x86_pmu_event_addr(int in=
dex)
 				  x86_pmu.addr_offset(index, false) : index);
 }
=20
+static inline unsigned int x86_pmu_fixed_ctr_addr(int index)
+{
+	return x86_pmu.fixedctr + (x86_pmu.addr_offset ?
+				   x86_pmu.addr_offset(index, false) : index);
+}
+
 static inline int x86_pmu_rdpmc_index(int index)
 {
 	return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-in=
dex.h
index 45ffa27569f4..7eac8f5cea3b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -566,6 +566,12 @@
 #define MSR_RELOAD_PMC0			0x000014c1
 #define MSR_RELOAD_FIXED_CTR0		0x00001309
=20
+/* V6 PMON MSR range */
+#define MSR_IA32_PMC_V6_GP0_CTR		0x1900
+#define MSR_IA32_PMC_V6_GP0_CFG_A	0x1901
+#define MSR_IA32_PMC_V6_FX0_CTR		0x1980
+#define MSR_IA32_PMC_V6_STEP		4
+
 /* KeyID partitioning between MKTME and TDX */
 #define MSR_IA32_MKTME_KEYID_PARTITIONING	0x00000087
=20
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 43FCE188CA6
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:11 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412513; cv=none;
 b=Z/aGHYaTnqZ1JguzH01QuCI5PMUAGX3C+mSIX2HFtcZbE/ehYTDVCY7+cEfZwJVyMV1Q5tXeebTBNWGdpaR1qnxvM5hNRjiALt1maxr1Mjiw48lAdQQv/yuevBxAdf2zLzRS6koW5nwV4GpNoscTrJ/k5yyobNfo2EwrEoWKF3s=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412513; c=relaxed/simple;
	bh=JFvN4mdnR6OdU7W23r7DyfuzHaSdaUCHKHcndgfBnOU=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=rAvI1xsXD0TJNCMTuW3WyK2w9ZJCeuI+7Tnih06hXat7ouj3x0tqg/kwsVSs5xblp4hdmbK2TxsKp61xbYdJ9FIsR90TVp/DhwWHggV8qumV+ErbDMoeNGRzEeef9iYqGv1rzIHXGWTSq6atqpV9cDjTqL2S7CJL1eKeQOOX18E=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=SoocTjb8; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="SoocTjb8"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412511; x=1750948511;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=JFvN4mdnR6OdU7W23r7DyfuzHaSdaUCHKHcndgfBnOU=;
  b=SoocTjb8wD2JiDtUIdSvDW5g6LqIkXt6ptINsVUra8mKbVQ6bQYlhQbt
   stIjhvoX355/9bCXaHuYioXzXAZN0ZUOdN+EzP1BmATp8KQ2vySyUQbDA
   GYbb0GJsPxpurqErXc83w1WvtxFG51Pu+ADM4IFPM6t/jiXzcGyZoXFQF
   EoY1Q5QUIRheHwhYsWlNqF/ZhV6W9DazTaS4nbanSb+n0Selfp4ncEpmy
   nuLItSW9Q0Iq958QX42f+Xoc3wc4BloLETOc4vBwVFZiyUbnnczS30NHH
   0BEWQJcdW6oS3XsDLfvtfeQDiA5U1w1c2IxHX9QSv3JRBRhVCJQAa6ap4
   Q==;
X-CSE-ConnectionGUID: f5zFnANMShSzQ/lxTZ9YCw==
X-CSE-MsgGUID: ZsYkAn+bR9Gc++je2LmPKg==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375553"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375553"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:46 -0700
X-CSE-ConnectionGUID: 85ZTcNacSHinRoUqIXDZoQ==
X-CSE-MsgGUID: MFxDc+ceS468XTK9fWQvIA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911977"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:46 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>,
	Sandipan Das <sandipan.das@amd.com>,
	Ravi Bangoria <ravi.bangoria@amd.com>,
	silviazhao <silviazhao-oc@zhaoxin.com>
Subject: [PATCH V3 09/13] perf/x86: Extend event update interface
Date: Wed, 26 Jun 2024 07:35:41 -0700
Message-Id: <20240626143545.480761-10-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The current event update interface directly reads the values from the
counter, but the values may not be the accurate ones users require. For
example, the sample read feature wants the counter value of the member
events when the leader event is overflow. But with the current
implementation, the read (event update) actually happens in the NMI
handler. There may be a small gap between the overflow and the NMI
handler. The new Intel PEBS counters snapshotting feature can provide
the accurate counter value in the overflow. The event update interface
has to be updated to apply the given accurate values.

Pass the accurate values via the event update interface. If the value is
not available, still directly read the counter.

Using u64 * rather than u64 as the new parameter. Because 0 might be a
valid rdpmc() value. The !val cannot be used to distinguish between
there begin an argument and there not being one. Also, for some cases,
e.g., intel_update_topdown_event, there could be more than one
counter/register are read.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: silviazhao <silviazhao-oc@zhaoxin.com>
---
 arch/x86/events/amd/core.c     |  2 +-
 arch/x86/events/core.c         | 13 ++++++-----
 arch/x86/events/intel/core.c   | 40 +++++++++++++++++++---------------
 arch/x86/events/intel/p4.c     |  2 +-
 arch/x86/events/perf_event.h   |  4 ++--
 arch/x86/events/zhaoxin/core.c |  2 +-
 6 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 920e3a640cad..284bf6157545 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -986,7 +986,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
=20
 		event =3D cpuc->events[idx];
 		hwc =3D &event->hw;
-		x86_perf_event_update(event);
+		x86_perf_event_update(event, NULL);
 		mask =3D BIT_ULL(idx);
=20
 		if (!(status & mask))
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 12f2a0c14d33..07a56bf71160 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -112,7 +112,7 @@ u64 __read_mostly hw_cache_extra_regs
  * Can only be executed on the CPU where the event is active.
  * Returns the delta events processed.
  */
-u64 x86_perf_event_update(struct perf_event *event)
+u64 x86_perf_event_update(struct perf_event *event, u64 *val)
 {
 	struct hw_perf_event *hwc =3D &event->hw;
 	int shift =3D 64 - x86_pmu.cntval_bits;
@@ -131,7 +131,10 @@ u64 x86_perf_event_update(struct perf_event *event)
 	 */
 	prev_raw_count =3D local64_read(&hwc->prev_count);
 	do {
-		rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+		if (!val)
+			rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+		else
+			new_raw_count =3D *val;
 	} while (!local64_try_cmpxchg(&hwc->prev_count,
 				      &prev_raw_count, new_raw_count));
=20
@@ -1598,7 +1601,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
 		 * Drain the remaining delta count out of a event
 		 * that we are disabling:
 		 */
-		static_call(x86_pmu_update)(event);
+		static_call(x86_pmu_update)(event, NULL);
 		hwc->state |=3D PERF_HES_UPTODATE;
 	}
 }
@@ -1689,7 +1692,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
=20
 		event =3D cpuc->events[idx];
=20
-		val =3D static_call(x86_pmu_update)(event);
+		val =3D static_call(x86_pmu_update)(event, NULL);
 		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 			continue;
=20
@@ -2036,7 +2039,7 @@ static void x86_pmu_static_call_update(void)
=20
 static void _x86_pmu_read(struct perf_event *event)
 {
-	static_call(x86_pmu_update)(event);
+	static_call(x86_pmu_update)(event, NULL);
 }
=20
 void x86_pmu_show_pmu_cap(struct pmu *pmu)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 3cc0f45b9b45..5cd9be066326 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2418,7 +2418,7 @@ static void intel_pmu_nhm_workaround(void)
 	for (i =3D 0; i < 4; i++) {
 		event =3D cpuc->events[i];
 		if (event)
-			static_call(x86_pmu_update)(event);
+			static_call(x86_pmu_update)(event, NULL);
 	}
=20
 	for (i =3D 0; i < 4; i++) {
@@ -2710,7 +2710,7 @@ static void update_saved_topdown_regs(struct perf_eve=
nt *event, u64 slots,
  * modify by a NMI. PMU has to be disabled before calling this function.
  */
=20
-static u64 intel_update_topdown_event(struct perf_event *event, int metric=
_end)
+static u64 intel_update_topdown_event(struct perf_event *event, int metric=
_end, u64 *val)
 {
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	struct perf_event *other;
@@ -2718,13 +2718,18 @@ static u64 intel_update_topdown_event(struct perf_e=
vent *event, int metric_end)
 	bool reset =3D true;
 	int idx;
=20
-	/* read Fixed counter 3 */
-	rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
-	if (!slots)
-		return 0;
+	if (!val) {
+		/* read Fixed counter 3 */
+		rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
+		if (!slots)
+			return 0;
=20
-	/* read PERF_METRICS */
-	rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+		/* read PERF_METRICS */
+		rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+	} else {
+		slots =3D val[0];
+		metrics =3D val[1];
+	}
=20
 	for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
 		if (!is_topdown_idx(idx))
@@ -2767,10 +2772,11 @@ static u64 intel_update_topdown_event(struct perf_e=
vent *event, int metric_end)
 	return slots;
 }
=20
-static u64 icl_update_topdown_event(struct perf_event *event)
+static u64 icl_update_topdown_event(struct perf_event *event, u64 *val)
 {
 	return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
-						 x86_pmu.num_topdown_events - 1);
+						 x86_pmu.num_topdown_events - 1,
+					  val);
 }
=20
 DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
@@ -2785,7 +2791,7 @@ static void intel_pmu_read_topdown_event(struct perf_=
event *event)
 		return;
=20
 	perf_pmu_disable(event->pmu);
-	static_call(intel_pmu_update_topdown_event)(event);
+	static_call(intel_pmu_update_topdown_event)(event, NULL);
 	perf_pmu_enable(event->pmu);
 }
=20
@@ -2796,7 +2802,7 @@ static void intel_pmu_read_event(struct perf_event *e=
vent)
 	else if (is_topdown_count(event))
 		intel_pmu_read_topdown_event(event);
 	else
-		x86_perf_event_update(event);
+		x86_perf_event_update(event, NULL);
 }
=20
 static void intel_pmu_enable_fixed(struct perf_event *event)
@@ -2899,7 +2905,7 @@ static void intel_pmu_add_event(struct perf_event *ev=
ent)
  */
 int intel_pmu_save_and_restart(struct perf_event *event)
 {
-	static_call(x86_pmu_update)(event);
+	static_call(x86_pmu_update)(event, NULL);
 	/*
 	 * For a checkpointed counter always reset back to 0.  This
 	 * avoids a situation where the counter overflows, aborts the
@@ -2922,12 +2928,12 @@ static int intel_pmu_set_period(struct perf_event *=
event)
 	return x86_perf_event_set_period(event);
 }
=20
-static u64 intel_pmu_update(struct perf_event *event)
+static u64 intel_pmu_update(struct perf_event *event, u64 *val)
 {
 	if (unlikely(is_topdown_count(event)))
-		return static_call(intel_pmu_update_topdown_event)(event);
+		return static_call(intel_pmu_update_topdown_event)(event, val);
=20
-	return x86_perf_event_update(event);
+	return x86_perf_event_update(event, val);
 }
=20
 static void intel_pmu_reset(void)
@@ -3091,7 +3097,7 @@ static int handle_pmi_common(struct pt_regs *regs, u6=
4 status)
 	 */
 	if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned lo=
ng *)&status)) {
 		handled++;
-		static_call(intel_pmu_update_topdown_event)(NULL);
+		static_call(intel_pmu_update_topdown_event)(NULL, NULL);
 	}
=20
 	/*
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 844bc4fc4724..3177be0dedd1 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -1058,7 +1058,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 		/* it might be unflagged overflow */
 		overflow =3D p4_pmu_clear_cccr_ovf(hwc);
=20
-		val =3D x86_perf_event_update(event);
+		val =3D x86_perf_event_update(event, NULL);
 		if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
 			continue;
=20
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8e3f2644a1a3..f6b57f0b2787 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -782,7 +782,7 @@ struct x86_pmu {
 	void		(*del)(struct perf_event *);
 	void		(*read)(struct perf_event *event);
 	int		(*set_period)(struct perf_event *event);
-	u64		(*update)(struct perf_event *event);
+	u64		(*update)(struct perf_event *event, u64 *val);
 	int		(*hw_config)(struct perf_event *event);
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
@@ -1131,7 +1131,7 @@ extern u64 __read_mostly hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
=20
-u64 x86_perf_event_update(struct perf_event *event);
+u64 x86_perf_event_update(struct perf_event *event, u64 *cntr);
=20
 static inline unsigned int x86_pmu_config_addr(int index)
 {
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 2fd9b0cf9a5e..5fe3a9eed650 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -391,7 +391,7 @@ static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
 		if (!test_bit(bit, cpuc->active_mask))
 			continue;
=20
-		x86_perf_event_update(event);
+		x86_perf_event_update(event, NULL);
 		perf_sample_data_init(&data, 0, event->hw.last_period);
=20
 		if (!x86_perf_event_set_period(event))
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 579AF18FC64
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:12 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412513; cv=none;
 b=RE+wgA72Q9zpQhzcAmqIz60mlxaQxdFIbj7QWC+4159Sj2wr495aP3dbV0FY74Jtz351WDl1/qdZ4DVIIxb1P0egzJ1S5+WsQ/px3JycOfFGYkHx5TQCiPViIe2vARGo1GnN1+U7zFxRkTSraOpOE1EhxdYe4Mw32aafC3n4m30=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412513; c=relaxed/simple;
	bh=ZzbzUKgmJ61REAZSX6FW8fCqG6lxjdeXPMQT33O/UQ4=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=NGNOnxYg+xWCo1Sy38bguRvnM1XolRF+L+dcWKEJ/OHKe9m4Ho1as7RcKTxsEEOX5fKjcwmpcs+3kn5PwwnpGX4I0DkKgdz1GpbUhjQ1QzsFFncrjMNlidqDf9iSNVxmDRx/RmmKB0H6mdPWzpTGwOJHp5L8vYD2EVuKWOQ+2j0=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=cFLJiCU0; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="cFLJiCU0"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412512; x=1750948512;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=ZzbzUKgmJ61REAZSX6FW8fCqG6lxjdeXPMQT33O/UQ4=;
  b=cFLJiCU0Z+zG1xluEqD/E9HO3uHXEQktEwK/d+dL1xG+AHFtPW+wfljO
   LNEYXPuHbiWgoUlcyDH3SElFJoIH4hOFrYYG3t1/JgKd+Epm2u2XWHGyF
   cUYghAZdM8P4qPyHBrvI1uPLcJM2btlpT5lSV+HHQkaSwwxD/5rBLvPfG
   VNNcMQ0DCGNqpJYS5MtC7B2OODZK/05y/o4/2yesEbLkHUUNpGyjq00j+
   1xn3m6i6Q64j6hHQLHVLTwonanOH34hhtWMi5Y9VHZK+JjaGg7cp0zQO8
   lKeUy3cajekIb54hvG0UYzlOY9vUlnp4v7hHa9JwDy3k0HNN3nMrSDvpE
   w==;
X-CSE-ConnectionGUID: B03i/Ul/QHa7ya+payt9QA==
X-CSE-MsgGUID: jXuTCdSWQZyiVEUEGK5wVA==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375564"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375564"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:46 -0700
X-CSE-ConnectionGUID: flj7B/YxQf64IndG722Gfw==
X-CSE-MsgGUID: Ib46YsQWSBKYD+7jceEf2g==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911980"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:46 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 10/13] perf: Extend perf_output_read
Date: Wed, 26 Jun 2024 07:35:42 -0700
Message-Id: <20240626143545.480761-11-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The event may have been updated in the PMU-specific implementation,
e.g., Intel PEBS counters snapshotting. The common code should not
read and overwrite the value.

The PERF_SAMPLE_READ in the data->sample_type can be used to detect
whether the PMU-specific value is available. If yes, avoid the
pmu->read() in the common code.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 kernel/events/core.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f908f077935..733e507948e6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7243,7 +7243,7 @@ static void perf_output_read_one(struct perf_output_h=
andle *handle,
=20
 static void perf_output_read_group(struct perf_output_handle *handle,
 			    struct perf_event *event,
-			    u64 enabled, u64 running)
+			    u64 enabled, u64 running, bool read)
 {
 	struct perf_event *leader =3D event->group_leader, *sub;
 	u64 read_format =3D event->attr.read_format;
@@ -7265,7 +7265,7 @@ static void perf_output_read_group(struct perf_output=
_handle *handle,
 	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] =3D running;
=20
-	if ((leader !=3D event) &&
+	if ((leader !=3D event) && read &&
 	    (leader->state =3D=3D PERF_EVENT_STATE_ACTIVE))
 		leader->pmu->read(leader);
=20
@@ -7280,7 +7280,7 @@ static void perf_output_read_group(struct perf_output=
_handle *handle,
 	for_each_sibling_event(sub, leader) {
 		n =3D 0;
=20
-		if ((sub !=3D event) &&
+		if ((sub !=3D event) && read &&
 		    (sub->state =3D=3D PERF_EVENT_STATE_ACTIVE))
 			sub->pmu->read(sub);
=20
@@ -7307,7 +7307,8 @@ static void perf_output_read_group(struct perf_output=
_handle *handle,
  * on another CPU, from interrupt/NMI context.
  */
 static void perf_output_read(struct perf_output_handle *handle,
-			     struct perf_event *event)
+			     struct perf_event *event,
+			     bool read)
 {
 	u64 enabled =3D 0, running =3D 0, now;
 	u64 read_format =3D event->attr.read_format;
@@ -7325,7 +7326,7 @@ static void perf_output_read(struct perf_output_handl=
e *handle,
 		calc_timer_values(event, &now, &enabled, &running);
=20
 	if (event->attr.read_format & PERF_FORMAT_GROUP)
-		perf_output_read_group(handle, event, enabled, running);
+		perf_output_read_group(handle, event, enabled, running, read);
 	else
 		perf_output_read_one(handle, event, enabled, running);
 }
@@ -7367,7 +7368,7 @@ void perf_output_sample(struct perf_output_handle *ha=
ndle,
 		perf_output_put(handle, data->period);
=20
 	if (sample_type & PERF_SAMPLE_READ)
-		perf_output_read(handle, event);
+		perf_output_read(handle, event, !(data->sample_flags & PERF_SAMPLE_READ)=
);
=20
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size =3D 1;
@@ -7968,7 +7969,7 @@ perf_event_read_event(struct perf_event *event,
 		return;
=20
 	perf_output_put(&handle, read_event);
-	perf_output_read(&handle, event);
+	perf_output_read(&handle, event, true);
 	perf_event__output_id_sample(event, &handle, &sample);
=20
 	perf_output_end(&handle);
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7CADD18FC6B
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:12 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412514; cv=none;
 b=Y8oBvjKZ5b8vqZaBfCxxD7twMzHx1G5o8eplmfh6INYgjqL9zrkNwWuy/qJitJ0xC+z1c1gJz/8IiZS6X84HU3Xl0PZjDq6VaOeRJERtXqk12EjguctxZuPx+13i3SEtTXZP7H4yq6qLZJ3xJ8yOC9XrBJlfyiRfJDjRS+e95yw=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412514; c=relaxed/simple;
	bh=XSKNicck/aA83lyPZSNV6kcwl32TrSKkF3d1JDFdPxg=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=cm8rdJVaHTKD1jfX7ymnl2N4mn6XpiBzsesZRTOOjDB7vBmpc/LCyTo3ILr5eIAagooWLv6jbmjzzPVV0L0CO8qzIlUdgiZEn33hjNbD8HJVsTIEvUiehNWQhluCUwbsExQP+zv2QNbKMw3QcyO4PzvtVqe4dgcDykEdOOiEPq0=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=N5Ggk6OB; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="N5Ggk6OB"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412513; x=1750948513;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=XSKNicck/aA83lyPZSNV6kcwl32TrSKkF3d1JDFdPxg=;
  b=N5Ggk6OBHoNCarZKeGe36rqaBi4Qz2wuDuQI7RI16lm4f6OHyAAgPyPe
   KKOxJJh4w4G50BHKlJdFXq6kz9LIq2QSwzeZaWn/gDPy2CURf1iFrYLwB
   JfBAird6afve25sZ6xf7yjQqDZyEi/6F7ubtUtp3Zi1dPi/YJkOmuCyQW
   7IVaThi2ieV5JOV1hYSH00YQq3p5eV/69mgwF5JSqgEwX8+IrYkiaLyWU
   BuvQI3QRKrO1oh3IvO/7Bjrq0tKCjkjIDjjNWdxGK2IvfktU+u0fXnScV
   ZkBKHshwGqqUGEAyT92xO6jp5qAhRkAzr5Hpvr90vJ/TdjPkPv+YIsSDW
   A==;
X-CSE-ConnectionGUID: UVPIg2DFS8a9ln9f11pSKw==
X-CSE-MsgGUID: wep8qzHjQw20AeOp5D78rw==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375567"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375567"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:46 -0700
X-CSE-ConnectionGUID: WN9GQNEXQHG6D7mk2yzjiQ==
X-CSE-MsgGUID: xovKC9EGRA+d2gwSAQRusQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911983"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:46 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 11/13] perf/x86/intel: Move PEBS event update after the
 sample output
Date: Wed, 26 Jun 2024 07:35:43 -0700
Message-Id: <20240626143545.480761-12-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

In the drain_pebs(), besides outputting the sample data, the perf needs
to update the PEBS event (e.g., prev_count, event->count, etc.) as well.
Both operations may invoke the perf_event_update(), but the sequence of
the two operations doesn't matter for now. Because the updated event
value is read directly from the counter via rdpmc. The counter stops in
the drain_pebs().

But if the updated event value is from different places (PEBS record VS.
counter), the sequence does matter. For example, with the new Intel PEBS
counters snapshotting feature, the large PEBS can be enabled for the
sample read, since counter values for each sample are recorded in PEBS
records. The current perf does the PEBS event update first, which also
updates the event for all the records altogether. It's impossible for
the later sample read output to dump the value for each sample, since
the prev_count is already the newest one from the current counter.

Move PEBS event update after the sample output. For each sample read
output, it will update and output the value only for this sample
(according to the value in the PEBS record). Once all samples are
output, update the PEBS event again according to the current counter,
and set the left period.

The !intel_pmu_save_and_restart() only happens when !hwc->event_base
or the left > 0. The !hwc->event_base is impossible for the PEBS event
which is only available on GP and fixed counters.
The __intel_pmu_pebs_event() is only to process the overflowed sample.
The left should be always <=3D0.
It's safe to ignore the return from the !inel_pmu_save_and_restart()
check.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/ds.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ce7e98409f29..fb04ef307f7b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2158,17 +2158,6 @@ __intel_pmu_pebs_event(struct perf_event *event,
 	void *at =3D get_next_pebs_record_by_bit(base, top, bit);
 	static struct pt_regs dummy_iregs;
=20
-	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
-		/*
-		 * Now, auto-reload is only enabled in fixed period mode.
-		 * The reload value is always hwc->sample_period.
-		 * May need to change it, if auto-reload is enabled in
-		 * freq mode later.
-		 */
-		intel_pmu_save_and_restart_reload(event, count);
-	} else if (!intel_pmu_save_and_restart(event))
-		return;
-
 	if (!iregs)
 		iregs =3D &dummy_iregs;
=20
@@ -2197,6 +2186,17 @@ __intel_pmu_pebs_event(struct perf_event *event,
 		if (perf_event_overflow(event, data, regs))
 			x86_pmu_stop(event, 0);
 	}
+
+	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+		/*
+		 * Now, auto-reload is only enabled in fixed period mode.
+		 * The reload value is always hwc->sample_period.
+		 * May need to change it, if auto-reload is enabled in
+		 * freq mode later.
+		 */
+		intel_pmu_save_and_restart_reload(event, count);
+	} else
+		intel_pmu_save_and_restart(event);
 }
=20
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_s=
ample_data *data)
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 82AFC18FC88
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:13 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412515; cv=none;
 b=EoAxzuMfuXtsa0Rry1XDLQ7vPWloiLHKagbMJHNgWpudf1BPpJZaA5UY4yCS9mvU8cqIR++OWh2r4M5YwcslCON5w+/7X8p84AfBl39ZruF+NEX3apdVmCFMk0seG3+1ssBkGQBSBtvP/IRWNjmqtRmuxGju5cals4vmxxj3Aow=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412515; c=relaxed/simple;
	bh=NWfGiGjITHs0zWA0THvdQtRofMsToIyn80NI40pf524=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=Lv70RHzr5P0XoiyiSZyC3hq76DopjOHvDFdcVwxSClMDR+yKih7pErBCWMLPLM0dkX2rv8w6vjXPtDDbIflOQbP+h9NghEtVYYl01yhUF22tg7RSrIyoWUKxVi5/2QAZiLVce3GAgE2LwUUfaXbgKaMgB9GpzrkYFOVjf0dKY6w=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=A6WOP8nC; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="A6WOP8nC"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412514; x=1750948514;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=NWfGiGjITHs0zWA0THvdQtRofMsToIyn80NI40pf524=;
  b=A6WOP8nCXGBYeuTnVLHWofkZo5p9Vm+cEhU73Kc385g+0VZp1mZyOa3D
   LW/oIphm2QJ6zf1J59GM1ALq7spw7KsY6mSE8Ov8UMCAjBgFVXz87RiZY
   ixxxOggrK/np77RuZUsFBog41dquwki1NG7U1+Pps50Nr5oWuxiB84poT
   ARKtTOmyCm/YD+jSJ//tAea5ZjtbHMXzc5tStNvuAx+JJyU+cu/jbz9ZF
   MN+KSmSfHd8Cswl4y8JKoWbblTR6qrUUJ3rsOx46/ud1lKGfHV3TidyWP
   dxBcXmUdUW0SqoFB+aod9LfQ2qjvEFEFHuo5J2aLgs5OC0yvgPRbi+NE5
   w==;
X-CSE-ConnectionGUID: oSRUWVyWTf+VOyjDiv2EvQ==
X-CSE-MsgGUID: Rea2PLJSRpWSZfDwZ/2Ypw==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375572"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375572"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:46 -0700
X-CSE-ConnectionGUID: OOBuxdVkT5yQq9yIRnb/dQ==
X-CSE-MsgGUID: +Q6Qga//SAOEF7WS2qRNUQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911986"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:46 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 12/13] perf/x86/intel: Support PEBS counters snapshotting
Date: Wed, 26 Jun 2024 07:35:44 -0700
Message-Id: <20240626143545.480761-13-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The counters snapshotting is a new adaptive PEBS extension, which can
capture programmable counters, fixed-function counters, and performance
metrics in a PEBS record. The feature is available in the PEBS format
V6.

The target counters can be configured in the new fields of MSR_PEBS_CFG.
Then the PEBS HW will generate the bit mask of counters (Counters Group
Header) followed by the content of all the requested counters into a
PEBS record.

The current Linux perf sample read feature intends to read the counters
of other member events when the leader event is overflowing. But the
current read is in the NMI handler, which may has a small gap from
overflow. Using the counters snapshotting feature for the sample read.

Add a new PEBS_CNTR flag to indicate a sample read group that utilizes
the counters snapshotting feature. When the group is scheduled, the
PEBS configure can be updated accordingly.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c       |  33 ++++++++-
 arch/x86/events/intel/ds.c         | 114 +++++++++++++++++++++++++++--
 arch/x86/events/perf_event.h       |   3 +
 arch/x86/events/perf_event_flags.h |   2 +-
 arch/x86/include/asm/perf_event.h  |  15 ++++
 5 files changed, 157 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 5cd9be066326..8f55503401ba 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4058,6 +4058,19 @@ static int intel_pmu_hw_config(struct perf_event *ev=
ent)
 		event->hw.flags |=3D PERF_X86_EVENT_PEBS_VIA_PT;
 	}
=20
+	if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
+	    (x86_pmu.intel_cap.pebs_format >=3D 6)) {
+		struct perf_event *leader =3D event->group_leader;
+
+		if (is_slots_event(leader))
+			leader =3D list_next_entry(leader, sibling_list);
+
+		if (leader->attr.precise_ip) {
+			leader->hw.flags |=3D PERF_X86_EVENT_PEBS_CNTR;
+			event->hw.flags |=3D PERF_X86_EVENT_PEBS_CNTR;
+		}
+	}
+
 	if ((event->attr.type =3D=3D PERF_TYPE_HARDWARE) ||
 	    (event->attr.type =3D=3D PERF_TYPE_HW_CACHE))
 		return 0;
@@ -4161,6 +4174,24 @@ static int intel_pmu_hw_config(struct perf_event *ev=
ent)
 	return 0;
 }
=20
+static int intel_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, in=
t *assign)
+{
+	struct perf_event *event;
+	int ret =3D x86_schedule_events(cpuc, n, assign);
+
+	if (ret)
+		return ret;
+
+	if (cpuc->is_fake)
+		return ret;
+
+	event =3D cpuc->event_list[n - 1];
+	if (event && (event->hw.flags & PERF_X86_EVENT_PEBS_CNTR))
+		intel_pmu_pebs_update_cfg(cpuc, n, assign);
+
+	return 0;
+}
+
 /*
  * Currently, the only caller of this function is the atomic_switch_perf_m=
srs().
  * The host perf context helps to prepare the values of the real hardware =
for
@@ -5245,7 +5276,7 @@ static __initconst const struct x86_pmu intel_pmu =3D=
 {
 	.set_period		=3D intel_pmu_set_period,
 	.update			=3D intel_pmu_update,
 	.hw_config		=3D intel_pmu_hw_config,
-	.schedule_events	=3D x86_schedule_events,
+	.schedule_events	=3D intel_pmu_schedule_events,
 	.eventsel		=3D MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		=3D MSR_ARCH_PERFMON_PERFCTR0,
 	.fixedctr		=3D MSR_ARCH_PERFMON_FIXED_CTR0,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index fb04ef307f7b..3cf547590df2 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1281,10 +1281,61 @@ static void adaptive_pebs_record_size_update(void)
 		sz +=3D sizeof(struct pebs_xmm);
 	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
 		sz +=3D x86_pmu.lbr_nr * sizeof(struct lbr_entry);
+	if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) {
+		sz +=3D sizeof(struct pebs_cntr_header);
+
+		/* Metrics base and Metrics Data */
+		if (pebs_data_cfg & PEBS_DATACFG_METRICS)
+			sz +=3D 2 * sizeof(u64);
+
+		if (pebs_data_cfg & PEBS_DATACFG_CNTR) {
+			sz +=3D hweight64((pebs_data_cfg >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DAT=
ACFG_CNTR_MASK)
+			      * sizeof(u64);
+			sz +=3D hweight64((pebs_data_cfg >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATA=
CFG_FIX_MASK)
+			      * sizeof(u64);
+		}
+	}
=20
 	cpuc->pebs_record_size =3D sz;
 }
=20
+static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
+					int idx, u64 *pebs_data_cfg)
+{
+	if (is_metric_event(event)) {
+		*pebs_data_cfg |=3D PEBS_DATACFG_METRICS;
+		return;
+	}
+
+	*pebs_data_cfg |=3D PEBS_DATACFG_CNTR;
+
+	if (idx >=3D INTEL_PMC_IDX_FIXED) {
+		*pebs_data_cfg |=3D ((1ULL << (idx - INTEL_PMC_IDX_FIXED)) & PEBS_DATACF=
G_FIX_MASK)
+				  << PEBS_DATACFG_FIX_SHIFT;
+	} else {
+		*pebs_data_cfg |=3D ((1ULL << idx) & PEBS_DATACFG_CNTR_MASK)
+				  << PEBS_DATACFG_CNTR_SHIFT;
+	}
+}
+
+void intel_pmu_pebs_update_cfg(struct cpu_hw_events *cpuc, int n, int *ass=
ign)
+{
+	struct perf_event *leader, *event;
+	u64 pebs_data_cfg =3D 0;
+	int i =3D n - 1;
+
+	leader =3D cpuc->event_list[i]->group_leader;
+	for (; i >=3D 0; i--) {
+		event =3D cpuc->event_list[i];
+		if (leader !=3D event->group_leader)
+			break;
+		__intel_pmu_pebs_update_cfg(event, assign[i], &pebs_data_cfg);
+	}
+
+	if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
+		cpuc->pebs_data_cfg |=3D pebs_data_cfg | PEBS_UPDATE_DS_SW;
+}
+
 #define PERF_PEBS_MEMINFO_TYPE	(PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | =
  \
 				PERF_SAMPLE_PHYS_ADDR |			     \
 				PERF_SAMPLE_WEIGHT_TYPE |		     \
@@ -2024,6 +2075,40 @@ static void setup_pebs_adaptive_sample_data(struct p=
erf_event *event,
 		}
 	}
=20
+	if (format_size & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) {
+		struct pebs_cntr_header *cntr =3D next_record;
+		int bit;
+
+		next_record +=3D sizeof(struct pebs_cntr_header);
+
+		for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERI=
C) {
+			x86_perf_event_update(cpuc->events[bit], (u64 *)next_record);
+			next_record +=3D sizeof(u64);
+		}
+
+		for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED=
) {
+			/* The slots event will be handled with perf_metric later */
+			if ((cntr->metrics =3D=3D INTEL_CNTR_METRICS) &&
+			    (INTEL_PMC_IDX_FIXED_SLOTS =3D=3D bit + INTEL_PMC_IDX_FIXED)) {
+				next_record +=3D sizeof(u64);
+				continue;
+			}
+			x86_perf_event_update(cpuc->events[bit + INTEL_PMC_IDX_FIXED], (u64 *)n=
ext_record);
+			next_record +=3D sizeof(u64);
+		}
+
+		/* HW will reload the value right after the overflow. */
+		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+			local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period);
+
+		if (cntr->metrics =3D=3D INTEL_CNTR_METRICS) {
+			static_call(intel_pmu_update_topdown_event)
+					(event->group_leader, (u64 *)next_record);
+			next_record +=3D 2 * sizeof(u64);
+		}
+		data->sample_flags |=3D PERF_SAMPLE_READ;
+	}
+
 	WARN_ONCE(next_record !=3D __pebs + (format_size >> 48),
 			"PEBS record size %llu, expected %llu, config %llx\n",
 			format_size >> 48,
@@ -2188,13 +2273,22 @@ __intel_pmu_pebs_event(struct perf_event *event,
 	}
=20
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
-		/*
-		 * Now, auto-reload is only enabled in fixed period mode.
-		 * The reload value is always hwc->sample_period.
-		 * May need to change it, if auto-reload is enabled in
-		 * freq mode later.
-		 */
-		intel_pmu_save_and_restart_reload(event, count);
+		if (event->hw.flags & PERF_X86_EVENT_PEBS_CNTR) {
+			/*
+			 * The value of each sample has been updated when setup
+			 * the corresponding sample data. But there may be a small
+			 * gap between the last overflow and the drain_pebs().
+			 */
+			intel_pmu_save_and_restart_reload(event, 0);
+		} else {
+			/*
+			 * Now, auto-reload is only enabled in fixed period mode.
+			 * The reload value is always hwc->sample_period.
+			 * May need to change it, if auto-reload is enabled in
+			 * freq mode later.
+			 */
+			intel_pmu_save_and_restart_reload(event, count);
+		}
 	} else
 		intel_pmu_save_and_restart(event);
 }
@@ -2486,6 +2580,10 @@ void __init intel_ds_init(void)
 			x86_pmu.large_pebs_flags |=3D PERF_SAMPLE_TIME;
 			break;
=20
+		case 6:
+			if (x86_pmu.intel_cap.pebs_baseline)
+				x86_pmu.large_pebs_flags |=3D PERF_SAMPLE_READ;
+			fallthrough;
 		case 5:
 			x86_pmu.pebs_ept =3D 1;
 			fallthrough;
@@ -2510,7 +2608,7 @@ void __init intel_ds_init(void)
 					  PERF_SAMPLE_REGS_USER |
 					  PERF_SAMPLE_REGS_INTR);
 			}
-			pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+			pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual);
=20
 			if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
 				pr_cont("PEBS-via-PT, ");
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f6b57f0b2787..3d64ed240e91 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1132,6 +1132,7 @@ extern u64 __read_mostly hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
=20
 u64 x86_perf_event_update(struct perf_event *event, u64 *cntr);
+DECLARE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
=20
 static inline unsigned int x86_pmu_config_addr(int index)
 {
@@ -1626,6 +1627,8 @@ void intel_pmu_pebs_disable_all(void);
=20
 void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, boo=
l sched_in);
=20
+void intel_pmu_pebs_update_cfg(struct cpu_hw_events *cpuc, int n, int *ass=
ign);
+
 void intel_pmu_auto_reload_read(struct perf_event *event);
=20
 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_even=
t_flags.h
index 6c977c19f2cd..1d9e385649b5 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -9,7 +9,7 @@ PERF_ARCH(PEBS_LD_HSW,		0x00008) /* haswell style datala, l=
oad */
 PERF_ARCH(PEBS_NA_HSW,		0x00010) /* haswell style datala, unknown */
 PERF_ARCH(EXCL,			0x00020) /* HT exclusivity on counter */
 PERF_ARCH(DYNAMIC,		0x00040) /* dynamic alloc'd constraint */
-			/*	0x00080	*/
+PERF_ARCH(PEBS_CNTR,		0x00080) /* PEBS counters snapshot */
 PERF_ARCH(EXCL_ACCT,		0x00100) /* accounted EXCL event */
 PERF_ARCH(AUTO_RELOAD,		0x00200) /* use PEBS auto-reload */
 PERF_ARCH(LARGE_PEBS,		0x00400) /* use large PEBS */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 91b73571412f..709746cd7c19 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -140,6 +140,12 @@
 #define PEBS_DATACFG_XMMS	BIT_ULL(2)
 #define PEBS_DATACFG_LBRS	BIT_ULL(3)
 #define PEBS_DATACFG_LBR_SHIFT	24
+#define PEBS_DATACFG_CNTR	BIT_ULL(4)
+#define PEBS_DATACFG_CNTR_SHIFT	32
+#define PEBS_DATACFG_CNTR_MASK	GENMASK_ULL(15, 0)
+#define PEBS_DATACFG_FIX_SHIFT	48
+#define PEBS_DATACFG_FIX_MASK	GENMASK_ULL(7, 0)
+#define PEBS_DATACFG_METRICS	BIT_ULL(5)
=20
 /* Steal the highest bit of pebs_data_cfg for SW usage */
 #define PEBS_UPDATE_DS_SW	BIT_ULL(63)
@@ -444,6 +450,15 @@ struct pebs_xmm {
 	u64 xmm[16*2];	/* two entries for each register */
 };
=20
+struct pebs_cntr_header {
+	u32 cntr;
+	u32 fixed;
+	u32 metrics;
+	u32 reserved;
+};
+
+#define INTEL_CNTR_METRICS		0x3
+
 /*
  * AMD Extended Performance Monitoring and Debug cpuid feature detection
  */
--=20
2.38.1
From nobody Sat Feb  7 23:07:44 2026
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1E78218FC9E
	for <linux-kernel@vger.kernel.org>; Wed, 26 Jun 2024 14:35:14 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=192.198.163.13
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1719412515; cv=none;
 b=uRzSjpY70w3cM7WPdKuqOHtWTewfUO/s7jkDWv3yknpfz6/tLOJJleQrXdsEvW5tahBzzIXLPnfpULVfSRuEf9ZEx4Wyr0tnlSC/WUQsM1c0qhxdj1u+tSrP/34W2bbYGgOj46LJvOdsEf+xZxuh6p8P42eoyxMXKbGKVK+CxkM=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1719412515; c=relaxed/simple;
	bh=4ucxH3P+149bPdBBvmc5sntVj6N2xVxJcL05gTSU6C0=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=AVTvMi4bpkTiHfdpErprtuQ2w8jr8lpdluWmrscuDRyfZDXxl9IKtkyD4c1jM2fxPTtiVMJnP1oTVaEt2oazhJ3RFNBDnbmCPHZDWy3u+mq+/PZ5K19lrxbgryjyCxfHPocBEvPDa7BPQ7OstBo4AHAJ0fnGoOg3knUZ1taWCq8=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=m/Ht3DZ1; arc=none smtp.client-ip=192.198.163.13
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="m/Ht3DZ1"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1719412514; x=1750948514;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=4ucxH3P+149bPdBBvmc5sntVj6N2xVxJcL05gTSU6C0=;
  b=m/Ht3DZ1rf8eZouODLN7IFQlywMOn08uthdg6hkwNy/8WC5GpO0y5Ops
   io3518qBJ9/4xsoBhTN3/LL/xU6YdVYIxUNiANTCSrjDgfIY/0SQi3ES1
   MXgqGOh+WtG5gUmweUBGwzerK2GRo8LjYIsFdQqls2DT9bsrmVJb1FNVd
   G/4vd+ISbKelX+QixiHuL7sDrIXeehzFzJtPkD/r+22BL1gtVEIDMOa0k
   ICL1JC4FQLDgThDnICr6b3TYmsSSjRHxRTPN8vUXRZx9FbO1aGZ4+n3/H
   MRX8dK0RZZOetwOBrU3ZVbuAY/EzSpdxdWi2ObcEu7TEy7XAYo7Hwgq8R
   Q==;
X-CSE-ConnectionGUID: eni5P2kNR56KW+J7k/DT/A==
X-CSE-MsgGUID: KCLyLnIKRleaOaoQ9QsmVA==
X-IronPort-AV: E=McAfee;i="6700,10204,11115"; a="19375577"
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="19375577"
Received: from orviesa010.jf.intel.com ([10.64.159.150])
  by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 26 Jun 2024 07:34:46 -0700
X-CSE-ConnectionGUID: eh2LEI0ITpGdyMKZpjZ3dw==
X-CSE-MsgGUID: aa5WBNOySsewzhGOtH4wKw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.08,267,1712646000";
   d="scan'208";a="43911990"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by orviesa010.jf.intel.com with ESMTP; 26 Jun 2024 07:34:46 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@kernel.org,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com,
	eranian@google.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 13/13] perf/x86/intel: Support RDPMC metrics clear mode
Date: Wed, 26 Jun 2024 07:35:45 -0700
Message-Id: <20240626143545.480761-14-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20240626143545.480761-1-kan.liang@linux.intel.com>
References: <20240626143545.480761-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The new RDPMC enhancement, metrics clear mode, is to clear the
PERF_METRICS-related resources as well as the fixed-function performance
monitoring counter 3 after the read is performed. It is available for
ring 3. The feature is enumerated by the
IA32_PERF_CAPABILITIES.RDPMC_CLEAR_METRICS[bit 19]. To enable the
feature, the IA32_FIXED_CTR_CTRL.METRICS_CLEAR_EN[bit 14] must be set.

Two ways were considered to enable the feature.
- Expose a knob in the sysfs globally. One user may affect the
  measurement of other users when changing the knob. The solution is
  dropped.
- Introduce a new event format, metrics_clear, for the slots event to
  disable/enable the feature only for the current process. Users can
  utilize the feature as needed.
The latter solution is implemented in the patch.

The current KVM doesn't support the perf metrics yet. For
virtualization, the feature can be enabled later separately.

Update the document of perf metrics.

Suggested-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c         | 20 +++++++++++++++++++-
 arch/x86/events/perf_event.h         |  1 +
 arch/x86/include/asm/perf_event.h    |  4 ++++
 tools/perf/Documentation/topdown.txt |  9 +++++++--
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8f55503401ba..0d985295433c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2822,6 +2822,9 @@ static void intel_pmu_enable_fixed(struct perf_event =
*event)
 			return;
=20
 		idx =3D INTEL_PMC_IDX_FIXED_SLOTS;
+
+		if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR)
+			bits |=3D INTEL_FIXED_3_METRICS_CLEAR;
 	}
=20
 	intel_set_masks(event, idx);
@@ -4086,7 +4089,12 @@ static int intel_pmu_hw_config(struct perf_event *ev=
ent)
 	 * is used in a metrics group, it too cannot support sampling.
 	 */
 	if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(ev=
ent)) {
-		if (event->attr.config1 || event->attr.config2)
+		/* The metrics_clear can only be set for the slots event */
+		if (event->attr.config1 &&
+		    (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METR=
IC_CLEAR)))
+			return -EINVAL;
+
+		if (event->attr.config2)
 			return -EINVAL;
=20
 		/*
@@ -4673,6 +4681,8 @@ PMU_FORMAT_ATTR(in_tx,  "config:32"	);
 PMU_FORMAT_ATTR(in_tx_cp, "config:33"	);
 PMU_FORMAT_ATTR(eq,	"config:36"	); /* v6 + */
=20
+PMU_FORMAT_ATTR(metrics_clear,	"config1:0"); /* PERF_CAPABILITIES.RDPMC_ME=
TRICS_CLEAR */
+
 static ssize_t umask2_show(struct device *dev,
 			   struct device_attribute *attr,
 			   char *page)
@@ -4692,6 +4702,7 @@ static struct device_attribute format_attr_umask2  =
=3D
 static struct attribute *format_evtsel_ext_attrs[] =3D {
 	&format_attr_umask2.attr,
 	&format_attr_eq.attr,
+	&format_attr_metrics_clear.attr,
 	NULL
 };
=20
@@ -4716,6 +4727,13 @@ evtsel_ext_is_visible(struct kobject *kobj, struct a=
ttribute *attr, int i)
 	if (i =3D=3D 1)
 		return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
=20
+	/* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+	if (i =3D=3D 2) {
+		union perf_capabilities intel_cap =3D hybrid(dev_get_drvdata(dev), intel=
_cap);
+
+		return intel_cap.rdpmc_metrics_clear ? attr->mode : 0;
+	}
+
 	return 0;
 }
=20
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 3d64ed240e91..9d1d5adec0ad 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -624,6 +624,7 @@ union perf_capabilities {
 		u64	pebs_output_pt_available:1;
 		u64	pebs_timing_info:1;
 		u64	anythread_deprecated:1;
+		u64	rdpmc_metrics_clear:1;
 	};
 	u64	capabilities;
 };
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 709746cd7c19..21e1d1fe5972 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -41,6 +41,7 @@
 #define INTEL_FIXED_0_USER				(1ULL << 1)
 #define INTEL_FIXED_0_ANYTHREAD			(1ULL << 2)
 #define INTEL_FIXED_0_ENABLE_PMI			(1ULL << 3)
+#define INTEL_FIXED_3_METRICS_CLEAR			(1ULL << 2)
=20
 #define HSW_IN_TX					(1ULL << 32)
 #define HSW_IN_TX_CHECKPOINTED				(1ULL << 33)
@@ -378,6 +379,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
 #define INTEL_TD_METRIC_MAX			INTEL_TD_METRIC_MEM_BOUND
 #define INTEL_TD_METRIC_NUM			8
=20
+#define INTEL_TD_CFG_METRIC_CLEAR_BIT		0
+#define INTEL_TD_CFG_METRIC_CLEAR		BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
+
 static inline bool is_metric_idx(int idx)
 {
 	return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentatio=
n/topdown.txt
index ae0aee86844f..f36c8ca1dc53 100644
--- a/tools/perf/Documentation/topdown.txt
+++ b/tools/perf/Documentation/topdown.txt
@@ -280,8 +280,13 @@ with no longer interval than a few seconds
=20
 	perf stat -I 1000 --topdown ...
=20
-For user programs using RDPMC directly the counter can
-be reset explicitly using ioctl:
+Starting from the Lunar Lake p-core, a RDPMC metrics clear mode is
+introduced. The metrics and the fixed counter 3 are automatically
+cleared after the read is performed. It is recommended to always enable
+the mode. To enable the mode, the config1 of slots event is set to 1.
+
+On the previous platforms, for user programs using RDPMC directly, the
+counter has to be reset explicitly using ioctl:
=20
 	ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
=20
--=20
2.38.1