Branch stack sampling rides along the normal perf event and all the branch
records get captured during the PMU interrupt. This just changes perf event
handling on the arm64 platform to accommodate required BRBE operations that
will enable branch stack sampling support.
It adds a new 'hw_perf_event.flags' element i.e ARMPMU_EVT_PRIV, which will
enable caching perf event privilege information required for capturing some
branch record types.
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
arch/arm64/kernel/perf_event.c | 6 ++++
drivers/perf/arm_pmu.c | 50 ++++++++++++++++++++++++++++++++++
include/linux/perf/arm_pmu.h | 4 +++
3 files changed, 60 insertions(+)
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index e7013699171f..5bfaba8edad1 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -874,6 +874,12 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
+ if (has_branch_stack(event)) {
+ cpu_pmu->brbe_read(cpuc, event);
+ data.br_stack = &cpuc->brbe_stack;
+ cpu_pmu->brbe_reset(cpuc);
+ }
+
/*
* Perf event overflow will queue the processing of the event as
* an irq_work which will be taken care of in the handling of
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 59d3980b8ca2..1fe5d6238b81 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -271,12 +271,22 @@ armpmu_stop(struct perf_event *event, int flags)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
*/
if (!(hwc->state & PERF_HES_STOPPED)) {
+ if (has_branch_stack(event)) {
+ WARN_ON_ONCE(!hw_events->brbe_users);
+ hw_events->brbe_users--;
+ if (!hw_events->brbe_users) {
+ hw_events->brbe_context = NULL;
+ armpmu->brbe_disable(hw_events);
+ }
+ }
+
armpmu->disable(event);
armpmu_event_update(event);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
@@ -287,6 +297,7 @@ static void armpmu_start(struct perf_event *event, int flags)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
/*
* ARM pmu always has to reprogram the period, so ignore
@@ -304,6 +315,14 @@ static void armpmu_start(struct perf_event *event, int flags)
* happened since disabling.
*/
armpmu_event_set_period(event);
+ if (has_branch_stack(event)) {
+ if (event->ctx->task && hw_events->brbe_context != event->ctx) {
+ armpmu->brbe_reset(hw_events);
+ hw_events->brbe_context = event->ctx;
+ }
+ armpmu->brbe_enable(hw_events);
+ hw_events->brbe_users++;
+ }
armpmu->enable(event);
}
@@ -349,6 +368,10 @@ armpmu_add(struct perf_event *event, int flags)
hw_events->events[idx] = event;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ if (has_branch_stack(event))
+ armpmu->brbe_filter(hw_events, event);
+
if (flags & PERF_EF_START)
armpmu_start(event, PERF_EF_RELOAD);
@@ -443,6 +466,7 @@ __hw_perf_event_init(struct perf_event *event)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
int mapping;
hwc->flags = 0;
@@ -492,6 +516,19 @@ __hw_perf_event_init(struct perf_event *event)
local64_set(&hwc->period_left, hwc->sample_period);
}
+ if (has_branch_stack(event)) {
+ /*
+ * Cache whether the perf event is allowed to capture exception
+ * and exception return branch records. It allows us to perform
+ * the privilege check via perfmon_capable(), in the context of
+ * the event owner, just once, during the pmu->event_init().
+ */
+ if (perfmon_capable())
+ event->hw.flags |= ARMPMU_EVT_PRIV;
+
+ armpmu->brbe_filter(hw_events, event);
+ }
+
return validate_group(event);
}
@@ -520,6 +557,18 @@ static int armpmu_event_init(struct perf_event *event)
return __hw_perf_event_init(event);
}
+static void armpmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ struct arm_pmu *armpmu = to_arm_pmu(ctx->pmu);
+ struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+
+ if (!hw_events->brbe_users)
+ return;
+
+ if (sched_in)
+ armpmu->brbe_reset(hw_events);
+}
+
static void armpmu_enable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
@@ -877,6 +926,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
}
pmu->pmu = (struct pmu) {
+ .sched_task = armpmu_sched_task,
.pmu_enable = armpmu_enable,
.pmu_disable = armpmu_disable,
.event_init = armpmu_event_init,
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 18e519e4e658..67f44020a736 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -29,6 +29,10 @@
/* Event uses a 47bit counter */
#define ARMPMU_EVT_47BIT 2
+#define ARMPMU_EVT_PRIV 0x00004 /* Event is privileged */
+
+static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_PRIV) == ARMPMU_EVT_PRIV);
+
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0xFFFF
--
2.25.1
Hi Anshuman,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on acme/perf/core]
[also build test WARNING on tip/perf/core arm64/for-next/core linus/master v6.0-rc4 next-20220908]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/arm64-perf-Enable-branch-stack-sampling/20220908-131425
base: https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git perf/core
config: arm64-randconfig-r025-20220907 (https://download.01.org/0day-ci/archive/20220908/202209082350.lDY2EvGx-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 1546df49f5a6d09df78f569e4137ddb365a3e827)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm64 cross compiling tool for clang build
# apt-get install binutils-aarch64-linux-gnu
# https://github.com/intel-lab-lkp/linux/commit/5c7c07e050abb38b80d0c129fdef3a6f4b761017
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Anshuman-Khandual/arm64-perf-Enable-branch-stack-sampling/20220908-131425
git checkout 5c7c07e050abb38b80d0c129fdef3a6f4b761017
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 SHELL=/bin/bash drivers/perf/
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
>> drivers/perf/arm_pmu.c:535:12: warning: stack frame size (2064) exceeds limit (2048) in 'armpmu_event_init' [-Wframe-larger-than]
static int armpmu_event_init(struct perf_event *event)
^
1 warning generated.
vim +/armpmu_event_init +535 drivers/perf/arm_pmu.c
1b8873a0c6ec51 arch/arm/kernel/perf_event.c Jamie Iles 2010-02-02 534
b0a873ebbf87bf arch/arm/kernel/perf_event.c Peter Zijlstra 2010-06-11 @535 static int armpmu_event_init(struct perf_event *event)
1b8873a0c6ec51 arch/arm/kernel/perf_event.c Jamie Iles 2010-02-02 536 {
8a16b34e21199e arch/arm/kernel/perf_event.c Mark Rutland 2011-04-28 537 struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
1b8873a0c6ec51 arch/arm/kernel/perf_event.c Jamie Iles 2010-02-02 538
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 539 /*
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 540 * Reject CPU-affine events for CPUs that are of a different class to
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 541 * that which this PMU handles. Process-following events (where
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 542 * event->cpu == -1) can be migrated between CPUs, and thus we have to
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 543 * reject them later (in armpmu_add) if they're scheduled on a
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 544 * different class of CPU.
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 545 */
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 546 if (event->cpu != -1 &&
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 547 !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 548 return -ENOENT;
cc88116da0d18b arch/arm/kernel/perf_event.c Mark Rutland 2015-05-13 549
2481c5fa6db023 arch/arm/kernel/perf_event.c Stephane Eranian 2012-02-09 550 /* does not support taken branch sampling */
2481c5fa6db023 arch/arm/kernel/perf_event.c Stephane Eranian 2012-02-09 551 if (has_branch_stack(event))
2481c5fa6db023 arch/arm/kernel/perf_event.c Stephane Eranian 2012-02-09 552 return -EOPNOTSUPP;
2481c5fa6db023 arch/arm/kernel/perf_event.c Stephane Eranian 2012-02-09 553
e1f431b57ef9e4 arch/arm/kernel/perf_event.c Mark Rutland 2011-04-28 554 if (armpmu->map_event(event) == -ENOENT)
b0a873ebbf87bf arch/arm/kernel/perf_event.c Peter Zijlstra 2010-06-11 555 return -ENOENT;
b0a873ebbf87bf arch/arm/kernel/perf_event.c Peter Zijlstra 2010-06-11 556
c09adab01e4aee drivers/perf/arm_pmu.c Mark Rutland 2017-03-10 557 return __hw_perf_event_init(event);
1b8873a0c6ec51 arch/arm/kernel/perf_event.c Jamie Iles 2010-02-02 558 }
1b8873a0c6ec51 arch/arm/kernel/perf_event.c Jamie Iles 2010-02-02 559
--
0-DAY CI Kernel Test Service
https://01.org/lkp
© 2016 - 2026 Red Hat, Inc.