From nobody Sun Feb  8 11:52:50 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 353F914AD3F
	for <linux-kernel@vger.kernel.org>; Thu, 27 Mar 2025 19:52:19 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743105140; cv=none;
 b=V44auuIU29u/SM+Vx4mtIsVUvZcYgDtZvWqXOWLVCFqO4YokxRXPjTQ3aS/x/OhcUr5waWWSiqVNghswU9E/pQ3rdnpRjZscko86lZRKQI+y5C1FotMhifkdIi9RAdllHMWKXU/9Bx1lMV/c8ESoevKt3edwfR6kQpWbScfYJpI=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743105140; c=relaxed/simple;
	bh=Z60DvuBqEK9JYTKRdo2TrAuShZt4c0gK+1WnFjvgzX8=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=MSeuGFDrPCOogbgB8nke3mrQ17/I/F9iUptWb+c6+pUA9391Ps1B3cyCYOO+HgiE6kWRho0egjc9a4zFVxCKcD0B/2L84fiMWIBY+gLDlIHZb7TLEbKZzF6X8UhoJkevFAgcEX09zHnY9BqjbhL9mjPknyOAH1UJqRWeT8W51io=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=hf/fAKZQ; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="hf/fAKZQ"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1743105139; x=1774641139;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=Z60DvuBqEK9JYTKRdo2TrAuShZt4c0gK+1WnFjvgzX8=;
  b=hf/fAKZQtSdsdS7kVBtWZaRk6MbUyiXDs51tladpK0IPjMPWPnuz6ouQ
   RGjbu2OnhjbGMbEkdxBecqLcwYIwNMYICXxcv1k7mTWWrCUmEbz7PrzAa
   yXMoMHsUNc/1vV0HNoR9cVBYyL/YDTZ9d860g71PUnRKSOAaAfvt19UGV
   sIzKVjH3/mB2g8AJDPHYRoqQwn5hLV0qgitQ5X1JOAwBYbCtP8UmjwkHR
   IAIONNtY3vFS1agUFjav1kEeDYv4tUCasiHmqHmQN0PneLIKs0ozZyq0d
   WmVqTfJew1iBEed8tRj9Nn+IrmEyvwLWMcrZVJG1+GjXhuT25cSTT1j+/
   g==;
X-CSE-ConnectionGUID: LEsRJwEbSrOxVCYsmIom9Q==
X-CSE-MsgGUID: 0vBtXtuQRuKyZ3qB4hz1iA==
X-IronPort-AV: E=McAfee;i="6700,10204,11385"; a="48115662"
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="48115662"
Received: from fmviesa007.fm.intel.com ([10.60.135.147])
  by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 27 Mar 2025 12:52:18 -0700
X-CSE-ConnectionGUID: TNX2H4qwQ+etby6wQjhc1Q==
X-CSE-MsgGUID: eCza2K8ORH2At5A0PbFj1Q==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="125207685"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by fmviesa007.fm.intel.com with ESMTP; 27 Mar 2025 12:52:17 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@redhat.com,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	ak@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: eranian@google.com,
	thomas.falcon@intel.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V4 1/5] perf/x86: Add dynamic constraint
Date: Thu, 27 Mar 2025 12:52:13 -0700
Message-Id: <20250327195217.2683619-2-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20250327195217.2683619-1-kan.liang@linux.intel.com>
References: <20250327195217.2683619-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

More and more features require a dynamic event constraint, e.g., branch
counter logging, auto counter reload, Arch PEBS, etc.

Add a generic flag, PMU_FL_DYN_CONSTRAINT, to indicate the case. It
avoids keeping adding the individual flag in intel_cpuc_prepare().

Add a variable dyn_constraint in the struct hw_perf_event to track the
dynamic constraint of the event. Apply it if it's updated.

Apply the generic dynamic constraint for branch counter logging.
Many features on and after V6 require dynamic constraint. So
unconditionally set the flag for V6+.

Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/core.c       |  1 +
 arch/x86/events/intel/core.c | 21 +++++++++++++++------
 arch/x86/events/intel/lbr.c  |  2 +-
 arch/x86/events/perf_event.h |  1 +
 include/linux/perf_event.h   |  1 +
 5 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 833478ffbbf5..e30a7d8a3929 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -674,6 +674,7 @@ static int __x86_pmu_event_init(struct perf_event *even=
t)
 	event->hw.idx =3D -1;
 	event->hw.last_cpu =3D -1;
 	event->hw.last_tag =3D ~0ULL;
+	event->hw.dyn_constraint =3D ~0ULL;
=20
 	/* mark unused */
 	event->hw.extra_reg.idx =3D EXTRA_REG_NONE;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index dc38dec244c1..2a3f802e3ab9 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3730,10 +3730,9 @@ intel_get_event_constraints(struct cpu_hw_events *cp=
uc, int idx,
 	if (cpuc->excl_cntrs)
 		return intel_get_excl_constraints(cpuc, event, idx, c2);
=20
-	/* Not all counters support the branch counter feature. */
-	if (branch_sample_counters(event)) {
+	if (event->hw.dyn_constraint !=3D ~0ULL) {
 		c2 =3D dyn_constraint(cpuc, c2, idx);
-		c2->idxmsk64 &=3D x86_pmu.lbr_counters;
+		c2->idxmsk64 &=3D event->hw.dyn_constraint;
 		c2->weight =3D hweight64(c2->idxmsk64);
 	}
=20
@@ -4135,15 +4134,19 @@ static int intel_pmu_hw_config(struct perf_event *e=
vent)
 		leader =3D event->group_leader;
 		if (branch_sample_call_stack(leader))
 			return -EINVAL;
-		if (branch_sample_counters(leader))
+		if (branch_sample_counters(leader)) {
 			num++;
+			leader->hw.dyn_constraint &=3D x86_pmu.lbr_counters;
+		}
 		leader->hw.flags |=3D PERF_X86_EVENT_BRANCH_COUNTERS;
=20
 		for_each_sibling_event(sibling, leader) {
 			if (branch_sample_call_stack(sibling))
 				return -EINVAL;
-			if (branch_sample_counters(sibling))
+			if (branch_sample_counters(sibling)) {
 				num++;
+				sibling->hw.dyn_constraint &=3D x86_pmu.lbr_counters;
+			}
 		}
=20
 		if (num > fls(x86_pmu.lbr_counters))
@@ -4943,7 +4946,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, in=
t cpu)
 			goto err;
 	}
=20
-	if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
+	if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAI=
NT)) {
 		size_t sz =3D X86_PMC_IDX_MAX * sizeof(struct event_constraint);
=20
 		cpuc->constraint_list =3D kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -6657,6 +6660,12 @@ __init int intel_pmu_init(void)
 			pr_cont(" AnyThread deprecated, ");
 	}
=20
+	/*
+	 * Many features on and after V6 require dynamic constraint,
+	 * e.g., Arch PEBS, ACR.
+	 */
+	if (version >=3D 6)
+		x86_pmu.flags |=3D PMU_FL_DYN_CONSTRAINT;
 	/*
 	 * Install the hw-cache-events table:
 	 */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f44c3d866f24..05acd6449ceb 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1618,7 +1618,7 @@ void __init intel_pmu_arch_lbr_init(void)
 	x86_pmu.lbr_nr =3D lbr_nr;
=20
 	if (!!x86_pmu.lbr_counters)
-		x86_pmu.flags |=3D PMU_FL_BR_CNTR;
+		x86_pmu.flags |=3D PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT;
=20
 	if (x86_pmu.lbr_mispred)
 		static_branch_enable(&x86_lbr_mispred);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8e5a4c3c5b95..d6d56568e11f 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1059,6 +1059,7 @@ do {									\
 #define PMU_FL_MEM_LOADS_AUX	0x100 /* Require an auxiliary event for the c=
omplete memory info */
 #define PMU_FL_RETIRE_LATENCY	0x200 /* Support Retire Latency in PEBS */
 #define PMU_FL_BR_CNTR		0x400 /* Support branch counter logging */
+#define PMU_FL_DYN_CONSTRAINT	0x800 /* Needs dynamic constraint */
=20
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 63dddb3b54f0..2fa0cd6772f1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -158,6 +158,7 @@ struct hw_perf_event {
 		struct { /* hardware */
 			u64		config;
 			u64		last_tag;
+			u64		dyn_constraint;
 			unsigned long	config_base;
 			unsigned long	event_base;
 			int		event_base_rdpmc;
--=20
2.38.1
From nobody Sun Feb  8 11:52:50 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 357B817BB21
	for <linux-kernel@vger.kernel.org>; Thu, 27 Mar 2025 19:52:20 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743105141; cv=none;
 b=OWZDK3tEyrCOSAvtb2BIXX8e5MKJ157d6oUr5Q5MbcmbhyGvjgcBNdz6j1a24vZWUWfkT9/Yf8SI2cdiOwt3JuKcHgt2QAnBk5ImMjnfmj6f7ythAUH9UnVPmH3ID1ait+M2LIM9l+uVdXPbmBjlr4z4kwuzMgBZStpkk3h1suw=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743105141; c=relaxed/simple;
	bh=+fdqIDe6IZhKndHvkhWjgqzlWgI592YvC7I1hrip3vA=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=I5X94qH0nDV8ADrnPUZAeEQ0gxtbucsjzOe38ltEKDtGJ1Xa7LML83sh2g0hq8jJPKNh6zQbgQiZXJow4SnC6xccU4N/Ey4WXr9ckAb+2L0aTfbqH1lJPhjLrwMbkEKxC781zRxmhit8mPRdX7s+8MH68zuDhh34wAMR1Q2lpw4=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=S+fyvt/4; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="S+fyvt/4"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1743105140; x=1774641140;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=+fdqIDe6IZhKndHvkhWjgqzlWgI592YvC7I1hrip3vA=;
  b=S+fyvt/4lJhHNjGdTUEvYWKWOr/dYlLDwdD9seidiBSo4zSQWcxW1HYG
   O0Tmvu85CuL86DBp9N6KOO650lU2bRZLqBX/zIdabbSjMJ7+szE1t7TYy
   tuM6FbnpaC/VQyb7BK4gnQNGXzvn3Kzt1xt9Eovm2mRJ6S1zgAKY9a5a1
   ZYmRPqDyGrSzGn0mPVmWlo373fvHx6TbWcvZuntXRQSAoExlXB+bqd14V
   LoCFZrNd8Hg3Uf5lxchmuWracwRTP4Hwd3hWe3J2iGY1RyHWdKIEwOCfO
   x0ziEel65taZJ7exEcc8QlESAdt2NDkQXcnsG+db+aoMUcoUO1vFFQAz6
   g==;
X-CSE-ConnectionGUID: hqww/MSJQtqwHILISN89cg==
X-CSE-MsgGUID: LFvoSiIoSCeQad75yCkasg==
X-IronPort-AV: E=McAfee;i="6700,10204,11385"; a="48115667"
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="48115667"
Received: from fmviesa007.fm.intel.com ([10.60.135.147])
  by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 27 Mar 2025 12:52:18 -0700
X-CSE-ConnectionGUID: Tfbb/jUYTD6CftDED50nLA==
X-CSE-MsgGUID: a88/Py6rTam2CyWaKNq3fA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="125207688"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by fmviesa007.fm.intel.com with ESMTP; 27 Mar 2025 12:52:17 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@redhat.com,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	ak@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: eranian@google.com,
	thomas.falcon@intel.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V4 2/5] perf/x86/intel: Track the num of events needs late
 setup
Date: Thu, 27 Mar 2025 12:52:14 -0700
Message-Id: <20250327195217.2683619-3-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20250327195217.2683619-1-kan.liang@linux.intel.com>
References: <20250327195217.2683619-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

When a machine supports PEBS v6, perf unconditionally searches the
cpuc->event_list[] for every event and check if the late setup is
required, which is unnecessary.

The late setup is only required for special events, e.g., events support
counters snapshotting feature. Add n_late_setup to track the num of
events that needs the late setup.

Other features, e.g., auto counter reload feature, require the late
setup as well. Add a wrapper, intel_pmu_pebs_late_setup, for the events
that support counters snapshotting feature.

Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c | 14 ++++++++++++++
 arch/x86/events/intel/ds.c   |  3 +--
 arch/x86/events/perf_event.h |  5 +++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2a3f802e3ab9..66c42f856636 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2603,6 +2603,8 @@ static void intel_pmu_del_event(struct perf_event *ev=
ent)
 		intel_pmu_lbr_del(event);
 	if (event->attr.precise_ip)
 		intel_pmu_pebs_del(event);
+	if (is_pebs_counter_event_group(event))
+		this_cpu_ptr(&cpu_hw_events)->n_late_setup--;
 }
=20
 static int icl_set_topdown_event_period(struct perf_event *event)
@@ -2914,12 +2916,24 @@ static void intel_pmu_enable_event(struct perf_even=
t *event)
 	}
 }
=20
+void intel_pmu_late_setup(void)
+{
+	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
+
+	if (!cpuc->n_late_setup)
+		return;
+
+	intel_pmu_pebs_late_setup(cpuc);
+}
+
 static void intel_pmu_add_event(struct perf_event *event)
 {
 	if (event->attr.precise_ip)
 		intel_pmu_pebs_add(event);
 	if (intel_pmu_needs_branch_stack(event))
 		intel_pmu_lbr_add(event);
+	if (is_pebs_counter_event_group(event))
+		this_cpu_ptr(&cpu_hw_events)->n_late_setup++;
 }
=20
 /*
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 1f7e1a692a7a..486881fe162e 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1355,9 +1355,8 @@ static void __intel_pmu_pebs_update_cfg(struct perf_e=
vent *event,
 }
=20
=20
-static void intel_pmu_late_setup(void)
+void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
 {
-	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
 	struct perf_event *event;
 	u64 pebs_data_cfg =3D 0;
 	int i;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index d6d56568e11f..84943243b05d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -261,6 +261,7 @@ struct cpu_hw_events {
 	struct event_constraint	*event_constraint[X86_PMC_IDX_MAX];
=20
 	int			n_excl; /* the number of exclusive events */
+	int			n_late_setup; /* the num of events needs late setup */
=20
 	unsigned int		txn_flags;
 	int			is_fake;
@@ -1598,6 +1599,8 @@ void intel_pmu_disable_bts(void);
=20
 int intel_pmu_drain_bts_buffer(void);
=20
+void intel_pmu_late_setup(void);
+
 u64 grt_latency_data(struct perf_event *event, u64 status);
=20
 u64 cmt_latency_data(struct perf_event *event, u64 status);
@@ -1654,6 +1657,8 @@ void intel_pmu_pebs_disable_all(void);
=20
 void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, boo=
l sched_in);
=20
+void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc);
+
 void intel_pmu_drain_pebs_buffer(void);
=20
 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
--=20
2.38.1
From nobody Sun Feb  8 11:52:50 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9C4BA1C7018
	for <linux-kernel@vger.kernel.org>; Thu, 27 Mar 2025 19:52:20 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743105143; cv=none;
 b=TFg0znFJzqI41enslZVwW+tvGd0lXqscVZB+/awLSaP9XrhC/PLNlh+vrJ3K7SmjVZd0EX1/BhoY7jX7CLXpypIgZqleXZt3eumJXmdNbHK0aY2d2YbA1mutZp5K8jeY6hDW2fYHCRNTnuNUOiVL22Imzk2eOrRL1aVYY1Fn6OY=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743105143; c=relaxed/simple;
	bh=hTEcAgFyabQdDOMvXUf1ra7M0I32RQuujqQop3hQj8E=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=AI4VXHM0ZNOtic2cNvLZ0damQ1OI2wxA+B4ehd2kgZxmTSsIfhn3PBBMN4o90O4FmHS64OvJbSadIIODv9DtJRMJf4QBNUGr+h8X7k8wjDim1aEJjpVRopuXqySOcY0Lae63z+CY1mTl6XV9lIOiPGBajjRaRS95U9Sf52wwKVg=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=imqEb3S2; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="imqEb3S2"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1743105142; x=1774641142;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=hTEcAgFyabQdDOMvXUf1ra7M0I32RQuujqQop3hQj8E=;
  b=imqEb3S21cWvov1heHcO1YrxOFfaAdXzTSAmdiTOwWS+EYrSmRFyD9mt
   WgqkY/69n0e5W58kMZqoN5RKlgF0BDHjy0kQKCPPIt8lWn+HiAMeiypWO
   x2iDzq7ZzVdR0Omg/YroCn+OWpmzzjetf7WofmDp/akUaIEP1gNEwBcyH
   WTKwUqrvF0RliR8bsQm69CT5DwH8gdBbNix+F7/kvA5//yZoBEMT5iRx+
   dxSiA2tcghUHDApbOtDRLDdaa3wz97BTNqqnbfHH1v4N8gyJ6oEbxRmTJ
   JdyJIB5nSzkZ+kjBUJTrRxnXgjtvxcAL8RaM6aehgbt5xVvxkR+7W7kEM
   A==;
X-CSE-ConnectionGUID: wX+Gp4UIRvW4oVBWRsP9Ww==
X-CSE-MsgGUID: 7wQRXqOYT4iLPo4v53mCsQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11385"; a="48115673"
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="48115673"
Received: from fmviesa007.fm.intel.com ([10.60.135.147])
  by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 27 Mar 2025 12:52:18 -0700
X-CSE-ConnectionGUID: RojT5B91QWyeDEZl5Wby3g==
X-CSE-MsgGUID: XTOR6rydQLeoz78q4ArLQg==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="125207692"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by fmviesa007.fm.intel.com with ESMTP; 27 Mar 2025 12:52:17 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@redhat.com,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	ak@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: eranian@google.com,
	thomas.falcon@intel.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V4 3/5] perf: Extend the bit width of the arch-specific flag
Date: Thu, 27 Mar 2025 12:52:15 -0700
Message-Id: <20250327195217.2683619-4-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20250327195217.2683619-1-kan.liang@linux.intel.com>
References: <20250327195217.2683619-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The auto counter reload feature requires an event flag to indicate an
auto counter reload group, which can only be scheduled on specific
counters that enumerated in CPUID. However, the hw_perf_event.flags has
run out on X86.

Two solutions were considered to address the issue.
- Currently, 20 bits are reserved for the architecture-specific flags.
  Only the bit 31 is used for the generic flag. There is still plenty
  of space left. Reserve 8 more bits for the arch-specific flags.
- Add a new X86 specific hw_perf_event.flags1 to support more flags.

The former is implemented. Enough room is still left in the global
generic flag.

Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/perf_event_flags.h | 41 +++++++++++++++---------------
 include/linux/perf_event.h         |  2 +-
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_even=
t_flags.h
index 1d9e385649b5..70078334e4a3 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -2,23 +2,24 @@
 /*
  * struct hw_perf_event.flags flags
  */
-PERF_ARCH(PEBS_LDLAT,		0x00001) /* ld+ldlat data address sampling */
-PERF_ARCH(PEBS_ST,		0x00002) /* st data address sampling */
-PERF_ARCH(PEBS_ST_HSW,		0x00004) /* haswell style datala, store */
-PERF_ARCH(PEBS_LD_HSW,		0x00008) /* haswell style datala, load */
-PERF_ARCH(PEBS_NA_HSW,		0x00010) /* haswell style datala, unknown */
-PERF_ARCH(EXCL,			0x00020) /* HT exclusivity on counter */
-PERF_ARCH(DYNAMIC,		0x00040) /* dynamic alloc'd constraint */
-PERF_ARCH(PEBS_CNTR,		0x00080) /* PEBS counters snapshot */
-PERF_ARCH(EXCL_ACCT,		0x00100) /* accounted EXCL event */
-PERF_ARCH(AUTO_RELOAD,		0x00200) /* use PEBS auto-reload */
-PERF_ARCH(LARGE_PEBS,		0x00400) /* use large PEBS */
-PERF_ARCH(PEBS_VIA_PT,		0x00800) /* use PT buffer for PEBS */
-PERF_ARCH(PAIR,			0x01000) /* Large Increment per Cycle */
-PERF_ARCH(LBR_SELECT,		0x02000) /* Save/Restore MSR_LBR_SELECT */
-PERF_ARCH(TOPDOWN,		0x04000) /* Count Topdown slots/metrics events */
-PERF_ARCH(PEBS_STLAT,		0x08000) /* st+stlat data address sampling */
-PERF_ARCH(AMD_BRS,		0x10000) /* AMD Branch Sampling */
-PERF_ARCH(PEBS_LAT_HYBRID,	0x20000) /* ld and st lat for hybrid */
-PERF_ARCH(NEEDS_BRANCH_STACK,	0x40000) /* require branch stack setup */
-PERF_ARCH(BRANCH_COUNTERS,	0x80000) /* logs the counters in the extra spac=
e of each branch */
+PERF_ARCH(PEBS_LDLAT,		0x0000001) /* ld+ldlat data address sampling */
+PERF_ARCH(PEBS_ST,		0x0000002) /* st data address sampling */
+PERF_ARCH(PEBS_ST_HSW,		0x0000004) /* haswell style datala, store */
+PERF_ARCH(PEBS_LD_HSW,		0x0000008) /* haswell style datala, load */
+PERF_ARCH(PEBS_NA_HSW,		0x0000010) /* haswell style datala, unknown */
+PERF_ARCH(EXCL,			0x0000020) /* HT exclusivity on counter */
+PERF_ARCH(DYNAMIC,		0x0000040) /* dynamic alloc'd constraint */
+PERF_ARCH(PEBS_CNTR,		0x0000080) /* PEBS counters snapshot */
+PERF_ARCH(EXCL_ACCT,		0x0000100) /* accounted EXCL event */
+PERF_ARCH(AUTO_RELOAD,		0x0000200) /* use PEBS auto-reload */
+PERF_ARCH(LARGE_PEBS,		0x0000400) /* use large PEBS */
+PERF_ARCH(PEBS_VIA_PT,		0x0000800) /* use PT buffer for PEBS */
+PERF_ARCH(PAIR,			0x0001000) /* Large Increment per Cycle */
+PERF_ARCH(LBR_SELECT,		0x0002000) /* Save/Restore MSR_LBR_SELECT */
+PERF_ARCH(TOPDOWN,		0x0004000) /* Count Topdown slots/metrics events */
+PERF_ARCH(PEBS_STLAT,		0x0008000) /* st+stlat data address sampling */
+PERF_ARCH(AMD_BRS,		0x0010000) /* AMD Branch Sampling */
+PERF_ARCH(PEBS_LAT_HYBRID,	0x0020000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK,	0x0040000) /* require branch stack setup */
+PERF_ARCH(BRANCH_COUNTERS,	0x0080000) /* logs the counters in the extra sp=
ace of each branch */
+PERF_ARCH(ACR,			0x0100000) /* Auto counter reload */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2fa0cd6772f1..aaffe31b78da 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -144,7 +144,7 @@ struct hw_perf_event_extra {
  * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
  * usage.
  */
-#define PERF_EVENT_FLAG_ARCH			0x000fffff
+#define PERF_EVENT_FLAG_ARCH			0x0fffffff
 #define PERF_EVENT_FLAG_USER_READ_CNT		0x80000000
=20
 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) =3D=
=3D 0);
--=20
2.38.1
From nobody Sun Feb  8 11:52:50 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id D5B7F1C5D63
	for <linux-kernel@vger.kernel.org>; Thu, 27 Mar 2025 19:52:20 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743105142; cv=none;
 b=IAOgdYLKVFuiVgA8c6t8MWkbtTds5qAFQ+035hFcoV7mIzYF3+n/ZeGobiKA+yI/+8jwxo6F6MvfwfbPScjOtNGEZFl4urUvrzPd9uuI4yZK4aIs15YWLjQC+NrBDRgdKjsTWMgCQQghM5E9DOTT5uL0Czyp4wMA3w4c0hcvb60=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743105142; c=relaxed/simple;
	bh=kuKRMi5M1SSbjhT4HxBTWUEUqp9MJDzOyzfDJYb7Nok=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version;
 b=M/Y4SUH24h7CkNQIBE/ExxUkoOqWX4ikUv37Rh1NtFZz/Lm8eCzVLVLpyGPGeBSBq+Xt445+YFNKmr5X0K3EkcL6vYayyxaoWC+DPPYUxhJ2w3GmI6tCNWG7EBqHDhkUTtJghEp/hnL7BlT7xsPrOT4pJTrG4M0Qimq0mpIeBNE=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=BaJxtCbf; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="BaJxtCbf"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1743105141; x=1774641141;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=kuKRMi5M1SSbjhT4HxBTWUEUqp9MJDzOyzfDJYb7Nok=;
  b=BaJxtCbfHnRiBI3QPejwVpEbRCHYLA6H6cygL4gDfzTiKqWaMf4qIZmP
   YmsDLSNpPeHqzRcSNMAx1FVzR3J5dPyONZfS/UZ5mhLa1P6fvnYhv4ut9
   PjiajryfBuuUWZPHgnokvWEp3bWrY+UDeZrPVYLOtYDXGfZ9Zu8icrkiu
   5vguNg2rmu/6p6ZTU8C4PvzDMja8ERC9Cuog2x5tzrW1E/F+9qJ5MIdnh
   Q3cCpDPPyEoJngsE5kpC7KieV5OOmG9TDrKahCRUFtsK2R/8HId/omIfy
   uihWacIVKY0B9MjEPoMIHmw7/ud2s7/P1y0WF69p8uMIlHlv2S7LVyfK8
   A==;
X-CSE-ConnectionGUID: ZzyZZkGOQRGEwdBPBtp1Pw==
X-CSE-MsgGUID: LawJEaPJS8eAa9KNniV9Kg==
X-IronPort-AV: E=McAfee;i="6700,10204,11385"; a="48115679"
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="48115679"
Received: from fmviesa007.fm.intel.com ([10.60.135.147])
  by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 27 Mar 2025 12:52:19 -0700
X-CSE-ConnectionGUID: s1PJpyKxRxCD6iY3NotpiA==
X-CSE-MsgGUID: +nOzmPq1Q4CRtuc8enXmAQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="125207696"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by fmviesa007.fm.intel.com with ESMTP; 27 Mar 2025 12:52:18 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@redhat.com,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	ak@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: eranian@google.com,
	thomas.falcon@intel.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V4 4/5] perf/x86/intel: Add CPUID enumeration for the auto
 counter reload
Date: Thu, 27 Mar 2025 12:52:16 -0700
Message-Id: <20250327195217.2683619-5-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20250327195217.2683619-1-kan.liang@linux.intel.com>
References: <20250327195217.2683619-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

From: Kan Liang <kan.liang@linux.intel.com>

The counters that support the auto counter reload feature can be
enumerated in the CPUID Leaf 0x23 sub-leaf 0x2.

Add acr_cntr_mask to store the mask of counters which are reloadable.
Add acr_cause_mask to store the mask of counters which can cause reload.
Since the e-core and p-core may have different numbers of counters,
track the masks in the struct x86_hybrid_pmu as well.

Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c      | 10 ++++++++++
 arch/x86/events/perf_event.h      | 17 +++++++++++++++++
 arch/x86/include/asm/perf_event.h |  1 +
 3 files changed, 28 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 66c42f856636..42cf474ee520 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5069,6 +5069,16 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pm=
u)
 		pmu->fixed_cntr_mask64 =3D fixed_cntr;
 	}
=20
+	if (eax.split.acr_subleaf) {
+		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
+			    &cntr, &fixed_cntr, &ecx, &edx);
+		/* The mask of the counters which can be reloaded */
+		pmu->acr_cntr_mask64 =3D cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);
+
+		/* The mask of the counters which can cause a reload of reloadable count=
ers */
+		pmu->acr_cause_mask64 =3D ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
+	}
+
 	if (!intel_pmu_broken_perf_cap()) {
 		/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration =
*/
 		rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 84943243b05d..b68b653d3a01 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -725,6 +725,15 @@ struct x86_hybrid_pmu {
 			u64		fixed_cntr_mask64;
 			unsigned long	fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	};
+
+	union {
+			u64		acr_cntr_mask64;
+			unsigned long	acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
+	union {
+			u64		acr_cause_mask64;
+			unsigned long	acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
 	struct event_constraint		unconstrained;
=20
 	u64				hw_cache_event_ids
@@ -823,6 +832,14 @@ struct x86_pmu {
 			u64		fixed_cntr_mask64;
 			unsigned long	fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	};
+	union {
+			u64		acr_cntr_mask64;
+			unsigned long	acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
+	union {
+			u64		acr_cause_mask64;
+			unsigned long	acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	};
 	int		cntval_bits;
 	u64		cntval_mask;
 	union {
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_=
event.h
index 812dac3f79f0..70d1d94aca7e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -195,6 +195,7 @@ union cpuid10_edx {
  */
 #define ARCH_PERFMON_EXT_LEAF			0x00000023
 #define ARCH_PERFMON_NUM_COUNTER_LEAF		0x1
+#define ARCH_PERFMON_ACR_LEAF			0x2
=20
 union cpuid35_eax {
 	struct {
--=20
2.38.1
From nobody Sun Feb  8 11:52:50 2026
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id E2CB61C8627
	for <linux-kernel@vger.kernel.org>; Thu, 27 Mar 2025 19:52:21 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743105144; cv=none;
 b=uEPi7ev/IMWPb5PZNvCibsjclX7vK9QZYBEcXXtaiLKtIwogVtqvc6f4POQ0+27QnWcxdA+0OUWdNMF4ouCBy+XXY27pK5NDS+0z9YpRYC8dBNIRJEUgEFQli0bxC9PRzDi9CtZN8d7Ief9x0/kSUF7PDv5yJbSfiETj4IXZRLs=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743105144; c=relaxed/simple;
	bh=wKeHCSiLIgoQZ/CisaeYgPUv5aX6MRDs4W+DuL2UYk8=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=shG63yTU8ArJ1+nMTEmClriIEUQjKO+r5rALLjJp5jUQeUXryFAioz4+nFirUKF1DMzqihGC+Oe3tf3/mXvWCMogagCVq2Wv0WnjnbAYqfqktQKnH5r6nDgC5qShyYTWz+0a/C2P9YMKaITf2PjOwpybvVF4RmyNpaRcfbLUA08=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com;
 spf=none smtp.mailfrom=linux.intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=IImpkpJ9; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=linux.intel.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="IImpkpJ9"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1743105142; x=1774641142;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=wKeHCSiLIgoQZ/CisaeYgPUv5aX6MRDs4W+DuL2UYk8=;
  b=IImpkpJ9lW6Dem9emyh7CnqaBzi7UJtFfEk0IPKWyM/rUPo9z4pwv/Y4
   WmXyyRDRmFq1DgIF8bYuwfD8BTNBpv5dRoVCDsQmVoJNSr/eGdot4UPgU
   jnb2XmHNwRWkGBLK9Vzy/ORNesq9gnAzv69DoDKJaX8nLnII37ym47+CP
   E5ISubQrZWXh7iLNZLYO3GlmgVLMJp2c1gOdH9CWWkfaenYlqzwljcl6l
   OhtodG2khU5EsXieIk9+5ogjYJ1rcOd3a4BajBncvkJEtOpwRR4T4OxXQ
   Z4BnXCRmzye71iYAqc5mFKvhofh+Y4A4XzBHk0htP5HOfixkxIqPNFCWI
   w==;
X-CSE-ConnectionGUID: Y6kfQeZGQASMLoHUBRPsag==
X-CSE-MsgGUID: aK6d9wbeRQmWLN1q7w/T8g==
X-IronPort-AV: E=McAfee;i="6700,10204,11385"; a="48115684"
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="48115684"
Received: from fmviesa007.fm.intel.com ([10.60.135.147])
  by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 27 Mar 2025 12:52:19 -0700
X-CSE-ConnectionGUID: aQ4kwjP2SbaYP2gnbTwq8A==
X-CSE-MsgGUID: DMM5sqMiRr2Q24Jub9BoMg==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.14,281,1736841600";
   d="scan'208";a="125207699"
Received: from kanliang-dev.jf.intel.com ([10.165.154.102])
  by fmviesa007.fm.intel.com with ESMTP; 27 Mar 2025 12:52:18 -0700
From: kan.liang@linux.intel.com
To: peterz@infradead.org,
	mingo@redhat.com,
	acme@kernel.org,
	namhyung@kernel.org,
	irogers@google.com,
	adrian.hunter@intel.com,
	ak@linux.intel.com,
	linux-kernel@vger.kernel.org
Cc: eranian@google.com,
	thomas.falcon@intel.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V4 5/5] perf/x86/intel: Support auto counter reload
Date: Thu, 27 Mar 2025 12:52:17 -0700
Message-Id: <20250327195217.2683619-6-kan.liang@linux.intel.com>
X-Mailer: git-send-email 2.38.1
In-Reply-To: <20250327195217.2683619-1-kan.liang@linux.intel.com>
References: <20250327195217.2683619-1-kan.liang@linux.intel.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable

From: Kan Liang <kan.liang@linux.intel.com>

The relative rates among two or more events are useful for performance
analysis, e.g., a high branch miss rate may indicate a performance
issue. Usually, the samples with a relative rate that exceeds some
threshold are more useful. However, the traditional sampling takes
samples of events separately. To get the relative rates among two or
more events, a high sample rate is required, which can bring high
overhead. Many samples taken in the non-hotspot area are also dropped
(useless) in the post-process.

The auto counter reload (ACR) feature takes samples when the relative
rate of two or more events exceeds some threshold, which provides the
fine-grained information at a low cost.
To support the feature, two sets of MSRs are introduced. For a given
counter IA32_PMC_GPn_CTR/IA32_PMC_FXm_CTR, bit fields in the
IA32_PMC_GPn_CFG_B/IA32_PMC_FXm_CFG_B MSR indicate which counter(s)
can cause a reload of that counter. The reload value is stored in the
IA32_PMC_GPn_CFG_C/IA32_PMC_FXm_CFG_C.
The details can be found at Intel SDM (085), Volume 3, 21.9.11 Auto
Counter Reload.

In the hw_config(), an ACR event is specially configured, because the
cause/reloadable counter mask has to be applied to the dyn_constraint.
Besides the HW limit, e.g., not support perf metrics, PDist and etc, a
SW limit is applied as well. ACR events in a group must be contiguous.
It facilitates the later conversion from the event idx to the counter
idx. Otherwise, the intel_pmu_acr_late_setup() has to traverse the whole
event list again to find the "cause" event.
Also, add a new flag PERF_X86_EVENT_ACR to indicate an ACR group, which
is set to the group leader.

The late setup() is also required for an ACR group. It's to convert the
event idx to the counter idx, and saved it in hw.config1.

The ACR configuration MSRs are only updated in the enable_event().
The disable_event() doesn't clear the ACR CFG register.
Add acr_cfg_b/acr_cfg_c in the struct cpu_hw_events to cache the MSR
values. It can avoid a MSR write if the value is not changed.

Expose an acr_mask to the sysfs. The perf tool can utilize the new
format to configure the relation of events in the group. The bit
sequence of the acr_mask follows the events enabled order of the group.

Example:

Here is the snippet of the mispredict.c. Since the array has a random
numbers, jumps are random and often mispredicted.
The mispredicted rate depends on the compared value.

For the Loop1, ~11% of all branches are mispredicted.
For the Loop2, ~21% of all branches are mispredicted.

main()
{
...
        for (i =3D 0; i < N; i++)
                data[i] =3D rand() % 256;
...
        /* Loop 1 */
        for (k =3D 0; k < 50; k++)
                for (i =3D 0; i < N; i++)
                        if (data[i] >=3D 64)
                                sum +=3D data[i];
...

...
        /* Loop 2 */
        for (k =3D 0; k < 50; k++)
                for (i =3D 0; i < N; i++)
                        if (data[i] >=3D 128)
                                sum +=3D data[i];
...
}

Usually, a code with a high branch miss rate means a bad performance.
To understand the branch miss rate of the codes, the traditional method
usually samples both branches and branch-misses events. E.g.,
perf record -e "{cpu_atom/branch-misses/ppu, cpu_atom/branch-instructions/u=
}"
               -c 1000000 -- ./mispredict

[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.925 MB perf.data (5106 samples) ]
The 5106 samples are from both events and spread in both Loops.
In the post-process stage, a user can know that the Loop 2 has a 21%
branch miss rate. Then they can focus on the samples of branch-misses
events for the Loop 2.

With this patch, the user can generate the samples only when the branch
miss rate > 20%. For example,
perf record -e "{cpu_atom/branch-misses,period=3D200000,acr_mask=3D0x2/ppu,
                 cpu_atom/branch-instructions,period=3D1000000,acr_mask=3D0=
x3/u}"
                -- ./mispredict

(Two different periods are applied to branch-misses and
branch-instructions. The ratio is set to 20%.
If the branch-instructions is overflowed first, the branch-miss
rate < 20%. No samples should be generated. All counters should be
automatically reloaded.
If the branch-misses is overflowed first, the branch-miss rate > 20%.
A sample triggered by the branch-misses event should be
generated. Just the counter of the branch-instructions should be
automatically reloaded.

The branch-misses event should only be automatically reloaded when
the branch-instructions is overflowed. So the "cause" event is the
branch-instructions event. The acr_mask is set to 0x2, since the
event index in the group of branch-instructions is 1.

The branch-instructions event is automatically reloaded no matter which
events are overflowed. So the "cause" events are the branch-misses
and the branch-instructions event. The acr_mask should be set to 0x3.)

[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.098 MB perf.data (2498 samples) ]

 $perf report

Percent       =E2=94=82154:   movl    $0x0,-0x14(%rbp)
              =E2=94=82     =E2=86=93 jmp     1af
              =E2=94=82     for (i =3D j; i < N; i++)
              =E2=94=8215d:   mov     -0x10(%rbp),%eax
              =E2=94=82       mov     %eax,-0x18(%rbp)
              =E2=94=82     =E2=86=93 jmp     1a2
              =E2=94=82     if (data[i] >=3D 128)
              =E2=94=82165:   mov     -0x18(%rbp),%eax
              =E2=94=82       cltq
              =E2=94=82       lea     0x0(,%rax,4),%rdx
              =E2=94=82       mov     -0x8(%rbp),%rax
              =E2=94=82       add     %rdx,%rax
              =E2=94=82       mov     (%rax),%eax
              =E2=94=82    =E2=94=8C=E2=94=80=E2=94=80cmp     $0x7f,%eax
100.00   0.00 =E2=94=82    =E2=94=9C=E2=94=80=E2=94=80jle     19e
              =E2=94=82    =E2=94=82sum +=3D data[i];

The 2498 samples are all from the branch-misses events for the Loop 2.

The number of samples and overhead is significantly reduced without
losing any information.

Tested-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/core.c           |   2 +-
 arch/x86/events/intel/core.c     | 226 ++++++++++++++++++++++++++++++-
 arch/x86/events/perf_event.h     |  10 ++
 arch/x86/include/asm/msr-index.h |   4 +
 include/linux/perf_event.h       |   1 +
 5 files changed, 240 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e30a7d8a3929..b0ef07d14c83 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -755,7 +755,7 @@ void x86_pmu_enable_all(int added)
 	}
 }
=20
-static inline int is_x86_event(struct perf_event *event)
+int is_x86_event(struct perf_event *event)
 {
 	int i;
=20
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 42cf474ee520..16f8aea33243 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2603,7 +2603,8 @@ static void intel_pmu_del_event(struct perf_event *ev=
ent)
 		intel_pmu_lbr_del(event);
 	if (event->attr.precise_ip)
 		intel_pmu_pebs_del(event);
-	if (is_pebs_counter_event_group(event))
+	if (is_pebs_counter_event_group(event) ||
+	    is_acr_event_group(event))
 		this_cpu_ptr(&cpu_hw_events)->n_late_setup--;
 }
=20
@@ -2882,6 +2883,52 @@ static void intel_pmu_enable_fixed(struct perf_event=
 *event)
 	cpuc->fixed_ctrl_val |=3D bits;
 }
=20
+static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
+{
+	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
+	int msr_b, msr_c;
+
+	if (!mask && !cpuc->acr_cfg_b[idx])
+		return;
+
+	if (idx < INTEL_PMC_IDX_FIXED) {
+		msr_b =3D MSR_IA32_PMC_V6_GP0_CFG_B;
+		msr_c =3D MSR_IA32_PMC_V6_GP0_CFG_C;
+	} else {
+		msr_b =3D MSR_IA32_PMC_V6_FX0_CFG_B;
+		msr_c =3D MSR_IA32_PMC_V6_FX0_CFG_C;
+		idx -=3D INTEL_PMC_IDX_FIXED;
+	}
+
+	if (cpuc->acr_cfg_b[idx] !=3D mask) {
+		wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask);
+		cpuc->acr_cfg_b[idx] =3D mask;
+	}
+	/* Only need to update the reload value when there is a valid config valu=
e. */
+	if (mask && cpuc->acr_cfg_c[idx] !=3D reload) {
+		wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload);
+		cpuc->acr_cfg_c[idx] =3D reload;
+	}
+}
+
+static void intel_pmu_enable_acr(struct perf_event *event)
+{
+	struct hw_perf_event *hwc =3D &event->hw;
+
+	if (!is_acr_event_group(event) || !event->attr.config2) {
+		/*
+		 * The disable doesn't clear the ACR CFG register.
+		 * Check and clear the ACR CFG register.
+		 */
+		intel_pmu_config_acr(hwc->idx, 0, 0);
+		return;
+	}
+
+	intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period);
+}
+
+DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
+
 static void intel_pmu_enable_event(struct perf_event *event)
 {
 	u64 enable_mask =3D ARCH_PERFMON_EVENTSEL_ENABLE;
@@ -2896,9 +2943,12 @@ static void intel_pmu_enable_event(struct perf_event=
 *event)
 		if (branch_sample_counters(event))
 			enable_mask |=3D ARCH_PERFMON_EVENTSEL_BR_CNTR;
 		intel_set_masks(event, idx);
+		static_call_cond(intel_pmu_enable_acr_event)(event);
 		__x86_pmu_enable_event(hwc, enable_mask);
 		break;
 	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+		static_call_cond(intel_pmu_enable_acr_event)(event);
+		fallthrough;
 	case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
 		intel_pmu_enable_fixed(event);
 		break;
@@ -2916,6 +2966,31 @@ static void intel_pmu_enable_event(struct perf_event=
 *event)
 	}
 }
=20
+static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
+{
+	struct perf_event *event, *leader;
+	int i, j, idx;
+
+	for (i =3D 0; i < cpuc->n_events; i++) {
+		leader =3D cpuc->event_list[i];
+		if (!is_acr_event_group(leader))
+			continue;
+
+		/* The ACR events must be contiguous. */
+		for (j =3D i; j < cpuc->n_events; j++) {
+			event =3D cpuc->event_list[j];
+			if (event->group_leader !=3D leader->group_leader)
+				break;
+			for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_ID=
X_MAX) {
+				if (WARN_ON_ONCE(i + idx > cpuc->n_events))
+					return;
+				__set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
+			}
+		}
+		i =3D j - 1;
+	}
+}
+
 void intel_pmu_late_setup(void)
 {
 	struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events);
@@ -2924,6 +2999,7 @@ void intel_pmu_late_setup(void)
 		return;
=20
 	intel_pmu_pebs_late_setup(cpuc);
+	intel_pmu_acr_late_setup(cpuc);
 }
=20
 static void intel_pmu_add_event(struct perf_event *event)
@@ -2932,7 +3008,8 @@ static void intel_pmu_add_event(struct perf_event *ev=
ent)
 		intel_pmu_pebs_add(event);
 	if (intel_pmu_needs_branch_stack(event))
 		intel_pmu_lbr_add(event);
-	if (is_pebs_counter_event_group(event))
+	if (is_pebs_counter_event_group(event) ||
+	    is_acr_event_group(event))
 		this_cpu_ptr(&cpu_hw_events)->n_late_setup++;
 }
=20
@@ -4087,6 +4164,39 @@ static u64 intel_pmu_freq_start_period(struct perf_e=
vent *event)
 	return start;
 }
=20
+static inline bool intel_pmu_has_acr(struct pmu *pmu)
+{
+	return !!hybrid(pmu, acr_cause_mask64);
+}
+
+static bool intel_pmu_is_acr_group(struct perf_event *event)
+{
+	/* The group leader has the ACR flag set */
+	if (is_acr_event_group(event))
+		return true;
+
+	/* The acr_mask is set */
+	if (event->attr.config2)
+		return true;
+
+	return false;
+}
+
+static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
+						 u64 *cause_mask, int *num)
+{
+	event->hw.dyn_constraint &=3D hybrid(event->pmu, acr_cntr_mask64);
+	*cause_mask |=3D event->attr.config2;
+	*num +=3D 1;
+}
+
+static inline void intel_pmu_set_acr_caused_constr(struct perf_event *even=
t,
+						   int idx, u64 cause_mask)
+{
+	if (test_bit(idx, (unsigned long *)&cause_mask))
+		event->hw.dyn_constraint &=3D hybrid(event->pmu, acr_cause_mask64);
+}
+
 static int intel_pmu_hw_config(struct perf_event *event)
 {
 	int ret =3D x86_pmu_hw_config(event);
@@ -4215,6 +4325,94 @@ static int intel_pmu_hw_config(struct perf_event *ev=
ent)
 	    event->attr.precise_ip)
 		event->group_leader->hw.flags |=3D PERF_X86_EVENT_PEBS_CNTR;
=20
+	if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) {
+		struct perf_event *sibling, *leader =3D event->group_leader;
+		struct pmu *pmu =3D event->pmu;
+		bool has_sw_event =3D false;
+		int num =3D 0, idx =3D 0;
+		u64 cause_mask =3D 0;
+
+		/* Not support perf metrics */
+		if (is_metric_event(event))
+			return -EINVAL;
+
+		/* Not support freq mode */
+		if (event->attr.freq)
+			return -EINVAL;
+
+		/* PDist is not supported */
+		if (event->attr.config2 && event->attr.precise_ip > 2)
+			return -EINVAL;
+
+		/* The reload value cannot exceeds the max period */
+		if (event->attr.sample_period > x86_pmu.max_period)
+			return -EINVAL;
+		/*
+		 * The counter-constraints of each event cannot be finalized
+		 * unless the whole group is scanned. However, it's hard
+		 * to know whether the event is the last one of the group.
+		 * Recalculate the counter-constraints for each event when
+		 * adding a new event.
+		 *
+		 * The group is traversed twice, which may be optimized later.
+		 * In the first round,
+		 * - Find all events which do reload when other events
+		 *   overflow and set the corresponding counter-constraints
+		 * - Add all events, which can cause other events reload,
+		 *   in the cause_mask
+		 * - Error out if the number of events exceeds the HW limit
+		 * - The ACR events must be contiguous.
+		 *   Error out if there are non-X86 events between ACR events.
+		 *   This is not a HW limit, but a SW limit.
+		 *   With the assumption, the intel_pmu_acr_late_setup() can
+		 *   easily convert the event idx to counter idx without
+		 *   traversing the whole event list.
+		 */
+		if (!is_x86_event(leader))
+			return -EINVAL;
+
+		if (leader->attr.config2)
+			intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num);
+
+		if (leader->nr_siblings) {
+			for_each_sibling_event(sibling, leader) {
+				if (!is_x86_event(sibling)) {
+					has_sw_event =3D true;
+					continue;
+				}
+				if (!sibling->attr.config2)
+					continue;
+				if (has_sw_event)
+					return -EINVAL;
+				intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num);
+			}
+		}
+		if (leader !=3D event && event->attr.config2) {
+			if (has_sw_event)
+				return -EINVAL;
+			intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num);
+		}
+
+		if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) ||
+		    num > hweight64(hybrid(event->pmu, acr_cntr_mask64)))
+			return -EINVAL;
+		/*
+		 * In the second round, apply the counter-constraints for
+		 * the events which can cause other events reload.
+		 */
+		intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask);
+
+		if (leader->nr_siblings) {
+			for_each_sibling_event(sibling, leader)
+				intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask);
+		}
+
+		if (leader !=3D event)
+			intel_pmu_set_acr_caused_constr(event, idx, cause_mask);
+
+		leader->hw.flags |=3D PERF_X86_EVENT_ACR;
+	}
+
 	if ((event->attr.type =3D=3D PERF_TYPE_HARDWARE) ||
 	    (event->attr.type =3D=3D PERF_TYPE_HW_CACHE))
 		return 0;
@@ -6060,6 +6258,21 @@ td_is_visible(struct kobject *kobj, struct attribute=
 *attr, int i)
 	return attr->mode;
 }
=20
+PMU_FORMAT_ATTR(acr_mask,	"config2:0-63");
+
+static struct attribute *format_acr_attrs[] =3D {
+	&format_attr_acr_mask.attr,
+	NULL
+};
+
+static umode_t
+acr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+	struct device *dev =3D kobj_to_dev(kobj);
+
+	return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0;
+}
+
 static struct attribute_group group_events_td  =3D {
 	.name =3D "events",
 	.is_visible =3D td_is_visible,
@@ -6102,6 +6315,12 @@ static struct attribute_group group_format_evtsel_ex=
t =3D {
 	.is_visible =3D evtsel_ext_is_visible,
 };
=20
+static struct attribute_group group_format_acr =3D {
+	.name       =3D "format",
+	.attrs      =3D format_acr_attrs,
+	.is_visible =3D acr_is_visible,
+};
+
 static struct attribute_group group_default =3D {
 	.attrs      =3D intel_pmu_attrs,
 	.is_visible =3D default_is_visible,
@@ -6116,6 +6335,7 @@ static const struct attribute_group *attr_update[] =
=3D {
 	&group_format_extra,
 	&group_format_extra_skl,
 	&group_format_evtsel_ext,
+	&group_format_acr,
 	&group_default,
 	NULL,
 };
@@ -6400,6 +6620,7 @@ static const struct attribute_group *hybrid_attr_upda=
te[] =3D {
 	&group_caps_lbr,
 	&hybrid_group_format_extra,
 	&group_format_evtsel_ext,
+	&group_format_acr,
 	&group_default,
 	&hybrid_group_cpus,
 	NULL,
@@ -6592,6 +6813,7 @@ static __always_inline void intel_pmu_init_skt(struct=
 pmu *pmu)
 	intel_pmu_init_grt(pmu);
 	hybrid(pmu, event_constraints) =3D intel_skt_event_constraints;
 	hybrid(pmu, extra_regs) =3D intel_cmt_extra_regs;
+	static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
 }
=20
 __init int intel_pmu_init(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index b68b653d3a01..902bc42a6cfe 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -120,6 +120,11 @@ static inline bool is_pebs_counter_event_group(struct =
perf_event *event)
 	return event->group_leader->hw.flags & PERF_X86_EVENT_PEBS_CNTR;
 }
=20
+static inline bool is_acr_event_group(struct perf_event *event)
+{
+	return event->group_leader->hw.flags & PERF_X86_EVENT_ACR;
+}
+
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
 	int refcnt; /* reference count */
@@ -287,6 +292,10 @@ struct cpu_hw_events {
 	u64			fixed_ctrl_val;
 	u64			active_fixed_ctrl_val;
=20
+	/* Intel ACR configuration */
+	u64			acr_cfg_b[X86_PMC_IDX_MAX];
+	u64			acr_cfg_c[X86_PMC_IDX_MAX];
+
 	/*
 	 * Intel LBR bits
 	 */
@@ -1120,6 +1129,7 @@ static struct perf_pmu_format_hybrid_attr format_attr=
_hybrid_##_name =3D {\
 	.pmu_type	=3D _pmu,						\
 }
=20
+int is_x86_event(struct perf_event *event);
 struct pmu *x86_get_pmu(unsigned int cpu);
 extern struct x86_pmu x86_pmu __read_mostly;
=20
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-in=
dex.h
index 72765b2fe0d8..55774dd73c6a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -592,7 +592,11 @@
 /* V6 PMON MSR range */
 #define MSR_IA32_PMC_V6_GP0_CTR		0x1900
 #define MSR_IA32_PMC_V6_GP0_CFG_A	0x1901
+#define MSR_IA32_PMC_V6_GP0_CFG_B	0x1902
+#define MSR_IA32_PMC_V6_GP0_CFG_C	0x1903
 #define MSR_IA32_PMC_V6_FX0_CTR		0x1980
+#define MSR_IA32_PMC_V6_FX0_CFG_B	0x1982
+#define MSR_IA32_PMC_V6_FX0_CFG_C	0x1983
 #define MSR_IA32_PMC_V6_STEP		4
=20
 /* KeyID partitioning between MKTME and TDX */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index aaffe31b78da..8f9ac5047972 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -157,6 +157,7 @@ struct hw_perf_event {
 	union {
 		struct { /* hardware */
 			u64		config;
+			u64		config1;
 			u64		last_tag;
 			u64		dyn_constraint;
 			unsigned long	config_base;
--=20
2.38.1