[RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU

Colton Lewis posted 4 patches 1 year ago
There is a newer version of this series
[RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Colton Lewis 1 year ago
For PMUv3, the register MDCR_EL2.HPMN partitiones the PMU counters
into two ranges where counters 0..HPMN-1 are accessible by EL1 and, if
allowed, EL0 while counters HPMN..N are only accessible by EL2.

Introduce a module parameter in the PMUv3 driver to set this
register. The name reserved_guest_counters reflects the intent to
reserve some counters for the guest so they may eventually be allowed
direct access to a subset of PMU functionality for increased
performance.

Track HPMN and whether the pmu is partitioned in struct arm_pmu.

While FEAT_HPMN0 does allow HPMN to be set to 0, this patch
specifically disallows that case because it's not useful given the
intention to allow guests access to their own counters.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
---
 arch/arm/include/asm/arm_pmuv3.h   | 10 +++++++
 arch/arm64/include/asm/arm_pmuv3.h | 10 +++++++
 drivers/perf/arm_pmuv3.c           | 43 ++++++++++++++++++++++++++++--
 include/linux/perf/arm_pmu.h       |  2 ++
 include/linux/perf/arm_pmuv3.h     |  7 +++++
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index 2ec0e5e83fc9..49ad90486aa5 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -277,4 +277,14 @@ static inline u64 read_pmceid1(void)
 	return val;
 }
 
+static inline u32 read_mdcr(void)
+{
+	return read_sysreg(mdcr_el2);
+}
+
+static inline void write_mdcr(u32 val)
+{
+	write_sysreg(val, mdcr_el2);
+}
+
 #endif
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
index 8a777dec8d88..fc37e7e81e07 100644
--- a/arch/arm64/include/asm/arm_pmuv3.h
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -188,4 +188,14 @@ static inline bool is_pmuv3p9(int pmuver)
 	return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9;
 }
 
+static inline u64 read_mdcr(void)
+{
+	return read_sysreg(mdcr_el2);
+}
+
+static inline void write_mdcr(u64 val)
+{
+	write_sysreg(val, mdcr_el2);
+}
+
 #endif
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index b5cc11abc962..55f9ae560715 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare);
 GEN_PMU_FORMAT_ATTR(threshold);
 
 static int sysctl_perf_user_access __read_mostly;
+static u8 reserved_guest_counters __read_mostly;
 
 static bool armv8pmu_event_is_64bit(struct perf_event *event)
 {
@@ -500,6 +501,29 @@ static void armv8pmu_pmcr_write(u64 val)
 	write_pmcr(val);
 }
 
+static u64 armv8pmu_mdcr_read(void)
+{
+	return read_mdcr();
+}
+
+static void armv8pmu_mdcr_write(u64 val)
+{
+	write_mdcr(val);
+	isb();
+}
+
+static void armv8pmu_partition(u8 hpmn)
+{
+	u64 mdcr = armv8pmu_mdcr_read();
+
+	mdcr &= ~MDCR_EL2_HPMN_MASK;
+	mdcr |= FIELD_PREP(ARMV8_PMU_MDCR_HPMN, hpmn);
+	/* Prevent guest counters counting at EL2 */
+	mdcr |= ARMV8_PMU_MDCR_HPMD;
+
+	armv8pmu_mdcr_write(mdcr);
+}
+
 static int armv8pmu_has_overflowed(u64 pmovsr)
 {
 	return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK);
@@ -1069,6 +1093,9 @@ static void armv8pmu_reset(void *info)
 
 	bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
 
+	if (cpu_pmu->partitioned)
+		armv8pmu_partition(cpu_pmu->hpmn);
+
 	/* The counter and interrupt enable registers are unknown at reset. */
 	armv8pmu_disable_counter(mask);
 	armv8pmu_disable_intens(mask);
@@ -1205,6 +1232,7 @@ static void __armv8pmu_probe_pmu(void *info)
 {
 	struct armv8pmu_probe_info *probe = info;
 	struct arm_pmu *cpu_pmu = probe->pmu;
+	u8 pmcr_n;
 	u64 pmceid_raw[2];
 	u32 pmceid[2];
 	int pmuver;
@@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)
 
 	cpu_pmu->pmuver = pmuver;
 	probe->present = true;
+	pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
 
 	/* Read the nb of CNTx counters supported from PMNC */
-	bitmap_set(cpu_pmu->cntr_mask,
-		   0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
+	bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
+
+	if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
+		cpu_pmu->hpmn = reserved_guest_counters;
+		cpu_pmu->partitioned = true;
+	} else {
+		reserved_guest_counters = 0;
+		cpu_pmu->hpmn = pmcr_n;
+		cpu_pmu->partitioned = false;
+	}
 
 	/* Add the CPU cycles counter */
 	set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
@@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event *event,
 	userpg->cap_user_time_zero = 1;
 	userpg->cap_user_time_short = 1;
 }
+
+module_param(reserved_guest_counters, byte, 0);
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 4b5b83677e3f..ad97aabed25a 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -101,6 +101,8 @@ struct arm_pmu {
 	void		(*reset)(void *);
 	int		(*map_event)(struct perf_event *event);
 	DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
+	u8		hpmn; /* MDCR_EL2.HPMN: counter partition pivot */
+	bool	        partitioned;
 	bool		secure_access; /* 32-bit ARM only */
 #define ARMV8_PMUV3_MAX_COMMON_EVENTS		0x40
 	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index d698efba28a2..d399e8c6f98e 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -223,6 +223,13 @@
 				 ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
 				 ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
 
+/*
+ * Per-CPU MDCR: config reg
+ */
+#define ARMV8_PMU_MDCR_HPMN		GENMASK(4, 0)
+#define ARMV8_PMU_MDCR_HPME		BIT(7)
+#define ARMV8_PMU_MDCR_HPMD		BIT(17)
+
 /*
  * PMOVSR: counters overflow flag status reg
  */
-- 
2.48.1.262.g85cc9f2d1e-goog
Re: [RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Rob Herring 1 year ago
On Mon, Jan 27, 2025 at 4:26 PM Colton Lewis <coltonlewis@google.com> wrote:
>
> For PMUv3, the register MDCR_EL2.HPMN partitiones the PMU counters
> into two ranges where counters 0..HPMN-1 are accessible by EL1 and, if
> allowed, EL0 while counters HPMN..N are only accessible by EL2.
>
> Introduce a module parameter in the PMUv3 driver to set this
> register. The name reserved_guest_counters reflects the intent to
> reserve some counters for the guest so they may eventually be allowed
> direct access to a subset of PMU functionality for increased
> performance.
>
> Track HPMN and whether the pmu is partitioned in struct arm_pmu.
>
> While FEAT_HPMN0 does allow HPMN to be set to 0, this patch
> specifically disallows that case because it's not useful given the
> intention to allow guests access to their own counters.
>
> Signed-off-by: Colton Lewis <coltonlewis@google.com>
> ---
>  arch/arm/include/asm/arm_pmuv3.h   | 10 +++++++
>  arch/arm64/include/asm/arm_pmuv3.h | 10 +++++++
>  drivers/perf/arm_pmuv3.c           | 43 ++++++++++++++++++++++++++++--
>  include/linux/perf/arm_pmu.h       |  2 ++
>  include/linux/perf/arm_pmuv3.h     |  7 +++++
>  5 files changed, 70 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
> index 2ec0e5e83fc9..49ad90486aa5 100644
> --- a/arch/arm/include/asm/arm_pmuv3.h
> +++ b/arch/arm/include/asm/arm_pmuv3.h
> @@ -277,4 +277,14 @@ static inline u64 read_pmceid1(void)
>         return val;
>  }
>
> +static inline u32 read_mdcr(void)
> +{
> +       return read_sysreg(mdcr_el2);
> +}
> +
> +static inline void write_mdcr(u32 val)
> +{
> +       write_sysreg(val, mdcr_el2);
> +}
> +
>  #endif
> diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
> index 8a777dec8d88..fc37e7e81e07 100644
> --- a/arch/arm64/include/asm/arm_pmuv3.h
> +++ b/arch/arm64/include/asm/arm_pmuv3.h
> @@ -188,4 +188,14 @@ static inline bool is_pmuv3p9(int pmuver)
>         return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9;
>  }
>
> +static inline u64 read_mdcr(void)
> +{
> +       return read_sysreg(mdcr_el2);
> +}
> +
> +static inline void write_mdcr(u64 val)
> +{
> +       write_sysreg(val, mdcr_el2);
> +}
> +
>  #endif
> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
> index b5cc11abc962..55f9ae560715 100644
> --- a/drivers/perf/arm_pmuv3.c
> +++ b/drivers/perf/arm_pmuv3.c
> @@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare);
>  GEN_PMU_FORMAT_ATTR(threshold);
>
>  static int sysctl_perf_user_access __read_mostly;
> +static u8 reserved_guest_counters __read_mostly;
>
>  static bool armv8pmu_event_is_64bit(struct perf_event *event)
>  {
> @@ -500,6 +501,29 @@ static void armv8pmu_pmcr_write(u64 val)
>         write_pmcr(val);
>  }
>
> +static u64 armv8pmu_mdcr_read(void)
> +{
> +       return read_mdcr();
> +}
> +
> +static void armv8pmu_mdcr_write(u64 val)
> +{
> +       write_mdcr(val);
> +       isb();
> +}
> +
> +static void armv8pmu_partition(u8 hpmn)
> +{
> +       u64 mdcr = armv8pmu_mdcr_read();
> +
> +       mdcr &= ~MDCR_EL2_HPMN_MASK;
> +       mdcr |= FIELD_PREP(ARMV8_PMU_MDCR_HPMN, hpmn);
> +       /* Prevent guest counters counting at EL2 */
> +       mdcr |= ARMV8_PMU_MDCR_HPMD;
> +
> +       armv8pmu_mdcr_write(mdcr);
> +}
> +
>  static int armv8pmu_has_overflowed(u64 pmovsr)
>  {
>         return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK);
> @@ -1069,6 +1093,9 @@ static void armv8pmu_reset(void *info)
>
>         bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
>
> +       if (cpu_pmu->partitioned)
> +               armv8pmu_partition(cpu_pmu->hpmn);
> +
>         /* The counter and interrupt enable registers are unknown at reset. */
>         armv8pmu_disable_counter(mask);
>         armv8pmu_disable_intens(mask);
> @@ -1205,6 +1232,7 @@ static void __armv8pmu_probe_pmu(void *info)
>  {
>         struct armv8pmu_probe_info *probe = info;
>         struct arm_pmu *cpu_pmu = probe->pmu;
> +       u8 pmcr_n;
>         u64 pmceid_raw[2];
>         u32 pmceid[2];
>         int pmuver;
> @@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)
>
>         cpu_pmu->pmuver = pmuver;
>         probe->present = true;
> +       pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
>
>         /* Read the nb of CNTx counters supported from PMNC */
> -       bitmap_set(cpu_pmu->cntr_mask,
> -                  0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
> +       bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
> +
> +       if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
> +               cpu_pmu->hpmn = reserved_guest_counters;
> +               cpu_pmu->partitioned = true;

You're storing the same information 3 times. 'partitioned' is just
'reserved_guest_counters != 0' or 'cpu_pmu->hpmn != pmcr_n'.

> +       } else {
> +               reserved_guest_counters = 0;
> +               cpu_pmu->hpmn = pmcr_n;
> +               cpu_pmu->partitioned = false;
> +       }
>
>         /* Add the CPU cycles counter */
>         set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
> @@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event *event,
>         userpg->cap_user_time_zero = 1;
>         userpg->cap_user_time_short = 1;
>  }
> +
> +module_param(reserved_guest_counters, byte, 0);

Module params are generally discouraged. Since this driver can't be a
module, this is a boot time only option. There's little reason this
can't be a sysfs setting. There's some complexity in changing this
when counters are in use (just reject the change) and when we have
asymmetric PMUs. Alternatively, it could be a sysctl like user access.

Rob
Re: [RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Colton Lewis 1 year ago
Hey Rob, thanks for the review.

Rob Herring <robh@kernel.org> writes:

> On Mon, Jan 27, 2025 at 4:26 PM Colton Lewis <coltonlewis@google.com>  
> wrote:
>> @@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)

>>          cpu_pmu->pmuver = pmuver;
>>          probe->present = true;
>> +       pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());

>>          /* Read the nb of CNTx counters supported from PMNC */
>> -       bitmap_set(cpu_pmu->cntr_mask,
>> -                  0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
>> +       bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
>> +
>> +       if (reserved_guest_counters > 0 && reserved_guest_counters <  
>> pmcr_n) {
>> +               cpu_pmu->hpmn = reserved_guest_counters;
>> +               cpu_pmu->partitioned = true;

> You're storing the same information 3 times. 'partitioned' is just
> 'reserved_guest_counters != 0' or 'cpu_pmu->hpmn != pmcr_n'.

Yes, because code outside this module that isn't already reading PMCR.N
will need to know the result of that comparion.

>> +       } else {
>> +               reserved_guest_counters = 0;
>> +               cpu_pmu->hpmn = pmcr_n;
>> +               cpu_pmu->partitioned = false;
>> +       }

>>          /* Add the CPU cycles counter */
>>          set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
>> @@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event  
>> *event,
>>          userpg->cap_user_time_zero = 1;
>>          userpg->cap_user_time_short = 1;
>>   }
>> +
>> +module_param(reserved_guest_counters, byte, 0);

> Module params are generally discouraged. Since this driver can't be a
> module, this is a boot time only option. There's little reason this
> can't be a sysfs setting. There's some complexity in changing this
> when counters are in use (just reject the change) and when we have
> asymmetric PMUs. Alternatively, it could be a sysctl like user access.

I see what you mean. I'm not dealing with those complexities yet. That's
why this is an RFC.

> Rob
Re: [RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Marc Zyngier 1 year ago
On Mon, 27 Jan 2025 22:20:27 +0000,
Colton Lewis <coltonlewis@google.com> wrote:
> 
> For PMUv3, the register MDCR_EL2.HPMN partitiones the PMU counters
> into two ranges where counters 0..HPMN-1 are accessible by EL1 and, if
> allowed, EL0 while counters HPMN..N are only accessible by EL2.
> 
> Introduce a module parameter in the PMUv3 driver to set this
> register. The name reserved_guest_counters reflects the intent to
> reserve some counters for the guest so they may eventually be allowed
> direct access to a subset of PMU functionality for increased
> performance.
> 
> Track HPMN and whether the pmu is partitioned in struct arm_pmu.
> 
> While FEAT_HPMN0 does allow HPMN to be set to 0, this patch
> specifically disallows that case because it's not useful given the
> intention to allow guests access to their own counters.
> 
> Signed-off-by: Colton Lewis <coltonlewis@google.com>
> ---
>  arch/arm/include/asm/arm_pmuv3.h   | 10 +++++++
>  arch/arm64/include/asm/arm_pmuv3.h | 10 +++++++
>  drivers/perf/arm_pmuv3.c           | 43 ++++++++++++++++++++++++++++--
>  include/linux/perf/arm_pmu.h       |  2 ++
>  include/linux/perf/arm_pmuv3.h     |  7 +++++
>  5 files changed, 70 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
> index 2ec0e5e83fc9..49ad90486aa5 100644
> --- a/arch/arm/include/asm/arm_pmuv3.h
> +++ b/arch/arm/include/asm/arm_pmuv3.h
> @@ -277,4 +277,14 @@ static inline u64 read_pmceid1(void)
>  	return val;
>  }
>  
> +static inline u32 read_mdcr(void)
> +{
> +	return read_sysreg(mdcr_el2);
> +}
> +
> +static inline void write_mdcr(u32 val)
> +{
> +	write_sysreg(val, mdcr_el2);
> +}
> +

This will obviously break the 32bit build.

>  #endif
> diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
> index 8a777dec8d88..fc37e7e81e07 100644
> --- a/arch/arm64/include/asm/arm_pmuv3.h
> +++ b/arch/arm64/include/asm/arm_pmuv3.h
> @@ -188,4 +188,14 @@ static inline bool is_pmuv3p9(int pmuver)
>  	return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9;
>  }
>  
> +static inline u64 read_mdcr(void)
> +{
> +	return read_sysreg(mdcr_el2);
> +}
> +
> +static inline void write_mdcr(u64 val)
> +{
> +	write_sysreg(val, mdcr_el2);
> +}
> +
>  #endif
> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
> index b5cc11abc962..55f9ae560715 100644
> --- a/drivers/perf/arm_pmuv3.c
> +++ b/drivers/perf/arm_pmuv3.c
> @@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare);
>  GEN_PMU_FORMAT_ATTR(threshold);
>  
>  static int sysctl_perf_user_access __read_mostly;
> +static u8 reserved_guest_counters __read_mostly;
>  
>  static bool armv8pmu_event_is_64bit(struct perf_event *event)
>  {
> @@ -500,6 +501,29 @@ static void armv8pmu_pmcr_write(u64 val)
>  	write_pmcr(val);
>  }
>  
> +static u64 armv8pmu_mdcr_read(void)
> +{
> +	return read_mdcr();
> +}
> +
> +static void armv8pmu_mdcr_write(u64 val)
> +{
> +	write_mdcr(val);
> +	isb();
> +}
> +
> +static void armv8pmu_partition(u8 hpmn)
> +{
> +	u64 mdcr = armv8pmu_mdcr_read();
> +
> +	mdcr &= ~MDCR_EL2_HPMN_MASK;
> +	mdcr |= FIELD_PREP(ARMV8_PMU_MDCR_HPMN, hpmn);
> +	/* Prevent guest counters counting at EL2 */
> +	mdcr |= ARMV8_PMU_MDCR_HPMD;
> +
> +	armv8pmu_mdcr_write(mdcr);
> +}
> +
>  static int armv8pmu_has_overflowed(u64 pmovsr)
>  {
>  	return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK);
> @@ -1069,6 +1093,9 @@ static void armv8pmu_reset(void *info)
>  
>  	bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);
>  
> +	if (cpu_pmu->partitioned)
> +		armv8pmu_partition(cpu_pmu->hpmn);
> +

Kaboom, see below.

>  	/* The counter and interrupt enable registers are unknown at reset. */
>  	armv8pmu_disable_counter(mask);
>  	armv8pmu_disable_intens(mask);
> @@ -1205,6 +1232,7 @@ static void __armv8pmu_probe_pmu(void *info)
>  {
>  	struct armv8pmu_probe_info *probe = info;
>  	struct arm_pmu *cpu_pmu = probe->pmu;
> +	u8 pmcr_n;
>  	u64 pmceid_raw[2];
>  	u32 pmceid[2];
>  	int pmuver;
> @@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)
>  
>  	cpu_pmu->pmuver = pmuver;
>  	probe->present = true;
> +	pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
>  
>  	/* Read the nb of CNTx counters supported from PMNC */
> -	bitmap_set(cpu_pmu->cntr_mask,
> -		   0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
> +	bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
> +
> +	if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
> +		cpu_pmu->hpmn = reserved_guest_counters;
> +		cpu_pmu->partitioned = true;

Isn't this going to completely explode on a kernel running at EL1?

Also, how does it work in an asymmetric configuration where some CPUs
can satisfy the reservation, and some can't?

> +	} else {
> +		reserved_guest_counters = 0;
> +		cpu_pmu->hpmn = pmcr_n;
> +		cpu_pmu->partitioned = false;
> +	}
>  
>  	/* Add the CPU cycles counter */
>  	set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
> @@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event *event,
>  	userpg->cap_user_time_zero = 1;
>  	userpg->cap_user_time_short = 1;
>  }
> +
> +module_param(reserved_guest_counters, byte, 0);
> diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
> index 4b5b83677e3f..ad97aabed25a 100644
> --- a/include/linux/perf/arm_pmu.h
> +++ b/include/linux/perf/arm_pmu.h
> @@ -101,6 +101,8 @@ struct arm_pmu {
>  	void		(*reset)(void *);
>  	int		(*map_event)(struct perf_event *event);
>  	DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
> +	u8		hpmn; /* MDCR_EL2.HPMN: counter partition pivot */
> +	bool	        partitioned;
>  	bool		secure_access; /* 32-bit ARM only */
>  #define ARMV8_PMUV3_MAX_COMMON_EVENTS		0x40
>  	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
> diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
> index d698efba28a2..d399e8c6f98e 100644
> --- a/include/linux/perf/arm_pmuv3.h
> +++ b/include/linux/perf/arm_pmuv3.h
> @@ -223,6 +223,13 @@
>  				 ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
>  				 ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
>  
> +/*
> + * Per-CPU MDCR: config reg
> + */
> +#define ARMV8_PMU_MDCR_HPMN		GENMASK(4, 0)
> +#define ARMV8_PMU_MDCR_HPME		BIT(7)
> +#define ARMV8_PMU_MDCR_HPMD		BIT(17)
> +

I'd rather we find a way to use the existing definitions form the
sysreg file rather than add more duplication.

I also don't see how the kernel knows not to access the low-numbered
counters at this point. Maybe in a later patch, I'll keep reading.

	M.

-- 
Without deviation from the norm, progress is not possible.
Re: [RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Colton Lewis 1 year ago
Hey Marc, thanks for looking.

Marc Zyngier <maz@kernel.org> writes:

> On Mon, 27 Jan 2025 22:20:27 +0000,
> Colton Lewis <coltonlewis@google.com> wrote:

>> For PMUv3, the register MDCR_EL2.HPMN partitiones the PMU counters
>> into two ranges where counters 0..HPMN-1 are accessible by EL1 and, if
>> allowed, EL0 while counters HPMN..N are only accessible by EL2.

>> Introduce a module parameter in the PMUv3 driver to set this
>> register. The name reserved_guest_counters reflects the intent to
>> reserve some counters for the guest so they may eventually be allowed
>> direct access to a subset of PMU functionality for increased
>> performance.

>> Track HPMN and whether the pmu is partitioned in struct arm_pmu.

>> While FEAT_HPMN0 does allow HPMN to be set to 0, this patch
>> specifically disallows that case because it's not useful given the
>> intention to allow guests access to their own counters.

>> Signed-off-by: Colton Lewis <coltonlewis@google.com>
>> ---
>>   arch/arm/include/asm/arm_pmuv3.h   | 10 +++++++
>>   arch/arm64/include/asm/arm_pmuv3.h | 10 +++++++
>>   drivers/perf/arm_pmuv3.c           | 43 ++++++++++++++++++++++++++++--
>>   include/linux/perf/arm_pmu.h       |  2 ++
>>   include/linux/perf/arm_pmuv3.h     |  7 +++++
>>   5 files changed, 70 insertions(+), 2 deletions(-)

>> diff --git a/arch/arm/include/asm/arm_pmuv3.h  
>> b/arch/arm/include/asm/arm_pmuv3.h
>> index 2ec0e5e83fc9..49ad90486aa5 100644
>> --- a/arch/arm/include/asm/arm_pmuv3.h
>> +++ b/arch/arm/include/asm/arm_pmuv3.h
>> @@ -277,4 +277,14 @@ static inline u64 read_pmceid1(void)
>>   	return val;
>>   }

>> +static inline u32 read_mdcr(void)
>> +{
>> +	return read_sysreg(mdcr_el2);
>> +}
>> +
>> +static inline void write_mdcr(u32 val)
>> +{
>> +	write_sysreg(val, mdcr_el2);
>> +}
>> +

> This will obviously break the 32bit build.

Dang! I did compile with 32 bit arm but I used defconfig which I now
realize doesn't define CONFIG_ARM_PMUV3 even though 64 bit arm does.

>>   #endif
>> diff --git a/arch/arm64/include/asm/arm_pmuv3.h  
>> b/arch/arm64/include/asm/arm_pmuv3.h
>> index 8a777dec8d88..fc37e7e81e07 100644
>> --- a/arch/arm64/include/asm/arm_pmuv3.h
>> +++ b/arch/arm64/include/asm/arm_pmuv3.h
>> @@ -188,4 +188,14 @@ static inline bool is_pmuv3p9(int pmuver)
>>   	return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9;
>>   }

>> +static inline u64 read_mdcr(void)
>> +{
>> +	return read_sysreg(mdcr_el2);
>> +}
>> +
>> +static inline void write_mdcr(u64 val)
>> +{
>> +	write_sysreg(val, mdcr_el2);
>> +}
>> +
>>   #endif
>> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
>> index b5cc11abc962..55f9ae560715 100644
>> --- a/drivers/perf/arm_pmuv3.c
>> +++ b/drivers/perf/arm_pmuv3.c
>> @@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare);
>>   GEN_PMU_FORMAT_ATTR(threshold);

>>   static int sysctl_perf_user_access __read_mostly;
>> +static u8 reserved_guest_counters __read_mostly;

>>   static bool armv8pmu_event_is_64bit(struct perf_event *event)
>>   {
>> @@ -500,6 +501,29 @@ static void armv8pmu_pmcr_write(u64 val)
>>   	write_pmcr(val);
>>   }

>> +static u64 armv8pmu_mdcr_read(void)
>> +{
>> +	return read_mdcr();
>> +}
>> +
>> +static void armv8pmu_mdcr_write(u64 val)
>> +{
>> +	write_mdcr(val);
>> +	isb();
>> +}
>> +
>> +static void armv8pmu_partition(u8 hpmn)
>> +{
>> +	u64 mdcr = armv8pmu_mdcr_read();
>> +
>> +	mdcr &= ~MDCR_EL2_HPMN_MASK;
>> +	mdcr |= FIELD_PREP(ARMV8_PMU_MDCR_HPMN, hpmn);
>> +	/* Prevent guest counters counting at EL2 */
>> +	mdcr |= ARMV8_PMU_MDCR_HPMD;
>> +
>> +	armv8pmu_mdcr_write(mdcr);
>> +}
>> +
>>   static int armv8pmu_has_overflowed(u64 pmovsr)
>>   {
>>   	return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK);
>> @@ -1069,6 +1093,9 @@ static void armv8pmu_reset(void *info)

>>   	bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS);

>> +	if (cpu_pmu->partitioned)
>> +		armv8pmu_partition(cpu_pmu->hpmn);
>> +

> Kaboom, see below.

>>   	/* The counter and interrupt enable registers are unknown at reset. */
>>   	armv8pmu_disable_counter(mask);
>>   	armv8pmu_disable_intens(mask);
>> @@ -1205,6 +1232,7 @@ static void __armv8pmu_probe_pmu(void *info)
>>   {
>>   	struct armv8pmu_probe_info *probe = info;
>>   	struct arm_pmu *cpu_pmu = probe->pmu;
>> +	u8 pmcr_n;
>>   	u64 pmceid_raw[2];
>>   	u32 pmceid[2];
>>   	int pmuver;
>> @@ -1215,10 +1243,19 @@ static void __armv8pmu_probe_pmu(void *info)

>>   	cpu_pmu->pmuver = pmuver;
>>   	probe->present = true;
>> +	pmcr_n = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());

>>   	/* Read the nb of CNTx counters supported from PMNC */
>> -	bitmap_set(cpu_pmu->cntr_mask,
>> -		   0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
>> +	bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
>> +
>> +	if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
>> +		cpu_pmu->hpmn = reserved_guest_counters;
>> +		cpu_pmu->partitioned = true;

> Isn't this going to completely explode on a kernel running at EL1?

Trying to access an EL2 register at EL1 can do that. I'll add the
appropriate hypercalls.

> Also, how does it work in an asymmetric configuration where some CPUs
> can satisfy the reservation, and some can't?

The CPUs that can't read their own value of PMCR.N below what the
attempted reservation is and so do not get partitioned. Nothing changes
for that CPU if it can't meet the reservation.

>> +	} else {
>> +		reserved_guest_counters = 0;
>> +		cpu_pmu->hpmn = pmcr_n;
>> +		cpu_pmu->partitioned = false;
>> +	}

>>   	/* Add the CPU cycles counter */
>>   	set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
>> @@ -1516,3 +1553,5 @@ void arch_perf_update_userpage(struct perf_event  
>> *event,
>>   	userpg->cap_user_time_zero = 1;
>>   	userpg->cap_user_time_short = 1;
>>   }
>> +
>> +module_param(reserved_guest_counters, byte, 0);
>> diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
>> index 4b5b83677e3f..ad97aabed25a 100644
>> --- a/include/linux/perf/arm_pmu.h
>> +++ b/include/linux/perf/arm_pmu.h
>> @@ -101,6 +101,8 @@ struct arm_pmu {
>>   	void		(*reset)(void *);
>>   	int		(*map_event)(struct perf_event *event);
>>   	DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
>> +	u8		hpmn; /* MDCR_EL2.HPMN: counter partition pivot */
>> +	bool	        partitioned;
>>   	bool		secure_access; /* 32-bit ARM only */
>>   #define ARMV8_PMUV3_MAX_COMMON_EVENTS		0x40
>>   	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
>> diff --git a/include/linux/perf/arm_pmuv3.h  
>> b/include/linux/perf/arm_pmuv3.h
>> index d698efba28a2..d399e8c6f98e 100644
>> --- a/include/linux/perf/arm_pmuv3.h
>> +++ b/include/linux/perf/arm_pmuv3.h
>> @@ -223,6 +223,13 @@
>>   				 ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
>>   				 ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)

>> +/*
>> + * Per-CPU MDCR: config reg
>> + */
>> +#define ARMV8_PMU_MDCR_HPMN		GENMASK(4, 0)
>> +#define ARMV8_PMU_MDCR_HPME		BIT(7)
>> +#define ARMV8_PMU_MDCR_HPMD		BIT(17)
>> +

> I'd rather we find a way to use the existing definitions form the
> sysreg file rather than add more duplication.

Easy enough

> I also don't see how the kernel knows not to access the low-numbered
> counters at this point. Maybe in a later patch, I'll keep reading.

> 	M.

> --
> Without deviation from the norm, progress is not possible.
Re: [RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Marc Zyngier 1 year ago
On Tue, 28 Jan 2025 22:08:27 +0000,
Colton Lewis <coltonlewis@google.com> wrote:
> 
> >> +	bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
> >> +
> >> +	if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
> >> +		cpu_pmu->hpmn = reserved_guest_counters;
> >> +		cpu_pmu->partitioned = true;
> 
> > Isn't this going to completely explode on a kernel running at EL1?
> 
> Trying to access an EL2 register at EL1 can do that. I'll add the
> appropriate hypercalls.

But what about a guest that would get passed the magic parameter? I
think you want to prevent that too.

> 
> > Also, how does it work in an asymmetric configuration where some CPUs
> > can satisfy the reservation, and some can't?
> 
> The CPUs that can't read their own value of PMCR.N below what the
> attempted reservation is and so do not get partitioned. Nothing changes
> for that CPU if it can't meet the reservation.

That's not what I meant. The question really is:

- do we want the reservation to be the number of counters reserved for
  the host

- or do we want it to be for the guest?

On symmetric systems, it doesn't matter. On broken big-little systems,
this changes everything (it has a direct impact on userspace's ability
to use the counters).

I think the design should reflect a decision on the above.

	M.

-- 
Without deviation from the norm, progress is not possible.
Re: [RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Colton Lewis 1 year ago
Marc Zyngier <maz@kernel.org> writes:

> On Tue, 28 Jan 2025 22:08:27 +0000,
> Colton Lewis <coltonlewis@google.com> wrote:

>> >> +	bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
>> >> +
>> >> +	if (reserved_guest_counters > 0 && reserved_guest_counters <  
>> pmcr_n) {
>> >> +		cpu_pmu->hpmn = reserved_guest_counters;
>> >> +		cpu_pmu->partitioned = true;

>> > Isn't this going to completely explode on a kernel running at EL1?

>> Trying to access an EL2 register at EL1 can do that. I'll add the
>> appropriate hypercalls.

> But what about a guest that would get passed the magic parameter? I
> think you want to prevent that too.

That doesn't matter because the ARM manual states that when HPMN is set,
reads of PMCR.N from EL1 have that value and I've made sure in the
second patch KVM does that, so a guest believes it has a system where
reserved_guest_counters/HPMN == PMCR.N so there is no partition.


>> > Also, how does it work in an asymmetric configuration where some CPUs
>> > can satisfy the reservation, and some can't?

>> The CPUs that can't read their own value of PMCR.N below what the
>> attempted reservation is and so do not get partitioned. Nothing changes
>> for that CPU if it can't meet the reservation.

> That's not what I meant. The question really is:

> - do we want the reservation to be the number of counters reserved for
>    the host

> - or do we want it to be for the guest?

> On symmetric systems, it doesn't matter. On broken big-little systems,
> this changes everything (it has a direct impact on userspace's ability
> to use the counters).

> I think the design should reflect a decision on the above.

As currently written and reflected in the name reserved_guest_counters
this series is making a reservation for the guest.

After talking with Oliver it probably makes more sense to make a
reservation for the host. This is closer to the semantics of the
underlying CPU feature.

In the limit case it's impossible to leave the host without
counters. All valid values for HPMN leave the host at least 1, but
should we make any guarantees beyond that?
Re: [RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Colton Lewis 1 year ago
Colton Lewis <coltonlewis@google.com> writes:

> Marc Zyngier <maz@kernel.org> writes:

>> On Tue, 28 Jan 2025 22:08:27 +0000,
>> Colton Lewis <coltonlewis@google.com> wrote:

>>> >> +	bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
>>> >> +
>>> >> +	if (reserved_guest_counters > 0 && reserved_guest_counters <
>>> pmcr_n) {
>>> >> +		cpu_pmu->hpmn = reserved_guest_counters;
>>> >> +		cpu_pmu->partitioned = true;

>>> > Isn't this going to completely explode on a kernel running at EL1?

>>> Trying to access an EL2 register at EL1 can do that. I'll add the
>>> appropriate hypercalls.

Ooohh. Making this work at EL1 is much more complicated than adding the
hypercall to write MDCR because once HPMN takes effect, the upper range
of counters that the host will use is only writable at EL2. That means
using any register related to any counter in the upper range would also
require a hypercall. The only way around that would be to avoid using
this feature in the host entirely and only enable it when we load a
guest.

I know we don't like feature discrepencies between VHE and nVHE mode,
but Oliver thinks it might be justified here to have PMU partitioning be
VHE-only.
Re: [RFC PATCH 1/4] perf: arm_pmuv3: Introduce module param to partition the PMU
Posted by Colton Lewis 1 year ago
Colton Lewis <coltonlewis@google.com> writes:

> Hey Marc, thanks for looking.

*for the review

> Marc Zyngier <maz@kernel.org> writes:

>> On Mon, 27 Jan 2025 22:20:27 +0000,
>> Colton Lewis <coltonlewis@google.com> wrote:

>>>    	/* Read the nb of CNTx counters supported from PMNC */
>>> -	bitmap_set(cpu_pmu->cntr_mask,
>>> -		   0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
>>> +	bitmap_set(cpu_pmu->cntr_mask, 0, pmcr_n);
>>> +
>>> +	if (reserved_guest_counters > 0 && reserved_guest_counters < pmcr_n) {
>>> +		cpu_pmu->hpmn = reserved_guest_counters;
>>> +		cpu_pmu->partitioned = true;

>> Isn't this going to completely explode on a kernel running at EL1?

> Trying to access an EL2 register at EL1 can do that. I'll add the
> appropriate hypercalls.

>> Also, how does it work in an asymmetric configuration where some CPUs
>> can satisfy the reservation, and some can't?

> The CPUs that can't read their own value of PMCR.N below what the
> attempted reservation is and so do not get partitioned. Nothing changes
> for that CPU if it can't meet the reservation.

My mistake. That does not happen. I originally did these comparisons in
armv8pmu_reset which runs for every CPU but __armv8pmu_probe_pmu
doesn't.