For PMUv3, the register field MDCR_EL2.HPMN partitiones the PMU
counters into two ranges where counters 0..HPMN-1 are accessible by
EL1 and, if allowed, EL0 while counters HPMN..N are only accessible by
EL2.
Create module parameters partition_pmu and reserved_guest_counters to
reserve a number of counters for the guest. These numbers are set at
boot because the perf subsystem assumes the number of counters will
not change after the PMU is probed.
Introduce the function armv8pmu_partition() to modify the PMU driver's
cntr_mask of available counters to exclude the counters being reserved
for the guest and record reserved_guest_counters as the maximum
allowable value for HPMN.
Due to the difficulty this feature would create for the driver running
at EL1 on the host, partitioning is only allowed in VHE mode. Working
on nVHE mode would require a hypercall for every counter access in the
driver because the counters reserved for the host by HPMN are only
accessible to EL2.
Signed-off-by: Colton Lewis <coltonlewis@google.com>
---
arch/arm/include/asm/arm_pmuv3.h | 10 ++++
arch/arm64/include/asm/arm_pmuv3.h | 5 ++
drivers/perf/arm_pmuv3.c | 95 +++++++++++++++++++++++++++++-
include/linux/perf/arm_pmu.h | 1 +
4 files changed, 109 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index 2ec0e5e83fc9..9dc43242538c 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -228,6 +228,11 @@ static inline bool kvm_set_pmuserenr(u64 val)
static inline void kvm_vcpu_pmu_resync_el0(void) {}
+static inline bool has_vhe(void)
+{
+ return false;
+}
+
/* PMU Version in DFR Register */
#define ARMV8_PMU_DFR_VER_NI 0
#define ARMV8_PMU_DFR_VER_V3P1 0x4
@@ -242,6 +247,11 @@ static inline bool pmuv3_implemented(int pmuver)
pmuver == ARMV8_PMU_DFR_VER_NI);
}
+static inline bool is_pmuv3p1(int pmuver)
+{
+ return pmuver >= ARMV8_PMU_DFR_VER_V3P1;
+}
+
static inline bool is_pmuv3p4(int pmuver)
{
return pmuver >= ARMV8_PMU_DFR_VER_V3P4;
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
index 32c003a7b810..e2057365ba73 100644
--- a/arch/arm64/include/asm/arm_pmuv3.h
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -173,6 +173,11 @@ static inline bool pmuv3_implemented(int pmuver)
pmuver == ID_AA64DFR0_EL1_PMUVer_NI);
}
+static inline bool is_pmuv3p1(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P1;
+}
+
static inline bool is_pmuv3p4(int pmuver)
{
return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4;
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 3db9f4ed17e8..26230cd4175c 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -35,6 +35,17 @@
#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC
#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED
+static bool partition_pmu __read_mostly;
+static u8 reserved_guest_counters __read_mostly;
+
+module_param(partition_pmu, bool, 0);
+MODULE_PARM_DESC(partition_pmu,
+ "Partition the PMU into host and guest VM counters [y/n]");
+
+module_param(reserved_guest_counters, byte, 0);
+MODULE_PARM_DESC(reserved_guest_counters,
+ "How many counters to reserve for guest VMs [0-$NR_COUNTERS]");
+
/*
* ARMv8 Architectural defined events, not all of these may
* be supported on any given implementation. Unsupported events will
@@ -500,6 +511,11 @@ static void armv8pmu_pmcr_write(u64 val)
write_pmcr(val);
}
+static u64 armv8pmu_pmcr_n_read(void)
+{
+ return FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
+}
+
static int armv8pmu_has_overflowed(u64 pmovsr)
{
return !!(pmovsr & ARMV8_PMU_OVERFLOWED_MASK);
@@ -1195,6 +1211,74 @@ struct armv8pmu_probe_info {
bool present;
};
+/**
+ * armv8pmu_reservation_is_valid() - Determine if reservation is allowed
+ * @guest_counters: Number of host counters to reserve
+ *
+ * Determine if the number of host counters in the argument is
+ * allowed. It is allowed if it will produce a valid value for
+ * register field MDCR_EL2.HPMN.
+ *
+ * Return: True if reservation allowed, false otherwise
+ */
+static bool armv8pmu_reservation_is_valid(u8 guest_counters)
+{
+ return guest_counters <= armv8pmu_pmcr_n_read();
+}
+
+/**
+ * armv8pmu_partition_supported() - Determine if partitioning is possible
+ *
+ * Partitioning is only supported in VHE mode (with PMUv3, assumed
+ * since we are in the PMUv3 driver)
+ *
+ * Return: True if partitioning is possible, false otherwise
+ */
+static bool armv8pmu_partition_supported(void)
+{
+ return has_vhe();
+}
+
+/**
+ * armv8pmu_partition() - Partition the PMU
+ * @pmu: Pointer to pmu being partitioned
+ * @guest_counters: Number of host counters to reserve
+ *
+ * Partition the given PMU by taking a number of host counters to
+ * reserve and, if it is a valid reservation, recording the
+ * corresponding HPMN value in the hpmn field of the PMU and clearing
+ * the guest-reserved counters from the counter mask.
+ *
+ * Passing 0 for @guest_counters has the effect of disabling partitioning.
+ *
+ * Return: 0 on success, -ERROR otherwise
+ */
+static int armv8pmu_partition(struct arm_pmu *pmu, u8 guest_counters)
+{
+ u8 nr_counters;
+ u8 hpmn;
+
+ if (!armv8pmu_reservation_is_valid(guest_counters))
+ return -EINVAL;
+
+ nr_counters = armv8pmu_pmcr_n_read();
+ hpmn = guest_counters;
+
+ pmu->hpmn_max = hpmn;
+
+ /* Inform host driver of available counters */
+ bitmap_clear(pmu->cntr_mask, 0, hpmn);
+ bitmap_set(pmu->cntr_mask, hpmn, nr_counters - hpmn);
+ clear_bit(ARMV8_PMU_CYCLE_IDX, pmu->cntr_mask);
+
+ if (pmuv3_has_icntr())
+ clear_bit(ARMV8_PMU_INSTR_IDX, pmu->cntr_mask);
+
+ pr_info("Partitioned PMU with HPMN %u", hpmn);
+
+ return 0;
+}
+
static void __armv8pmu_probe_pmu(void *info)
{
struct armv8pmu_probe_info *probe = info;
@@ -1209,10 +1293,10 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->pmuver = pmuver;
probe->present = true;
+ cpu_pmu->hpmn_max = -1;
/* Read the nb of CNTx counters supported from PMNC */
- bitmap_set(cpu_pmu->cntr_mask,
- 0, FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read()));
+ bitmap_set(cpu_pmu->cntr_mask, 0, armv8pmu_pmcr_n_read());
/* Add the CPU cycles counter */
set_bit(ARMV8_PMU_CYCLE_IDX, cpu_pmu->cntr_mask);
@@ -1221,6 +1305,13 @@ static void __armv8pmu_probe_pmu(void *info)
if (pmuv3_has_icntr())
set_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask);
+ if (partition_pmu) {
+ if (armv8pmu_partition_supported())
+ WARN_ON(armv8pmu_partition(cpu_pmu, reserved_guest_counters));
+ else
+ pr_err("PMU partition is not supported");
+ }
+
pmceid[0] = pmceid_raw[0] = read_pmceid0();
pmceid[1] = pmceid_raw[1] = read_pmceid1();
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 1de206b09616..95f2b800e63d 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -130,6 +130,7 @@ struct arm_pmu {
/* Only to be used by ACPI probing code */
unsigned long acpi_cpuid;
+ u8 hpmn_max; /* MDCR_EL2.HPMN: counter partition pivot */
};
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
--
2.50.0.714.g196bf9f422-goog
On Fri, Jun 20, 2025 at 10:13:07PM +0000, Colton Lewis wrote: > For PMUv3, the register field MDCR_EL2.HPMN partitiones the PMU > counters into two ranges where counters 0..HPMN-1 are accessible by > EL1 and, if allowed, EL0 while counters HPMN..N are only accessible by > EL2. > > Create module parameters partition_pmu and reserved_guest_counters to > reserve a number of counters for the guest. These numbers are set at > boot because the perf subsystem assumes the number of counters will > not change after the PMU is probed. > > Introduce the function armv8pmu_partition() to modify the PMU driver's > cntr_mask of available counters to exclude the counters being reserved > for the guest and record reserved_guest_counters as the maximum > allowable value for HPMN. > > Due to the difficulty this feature would create for the driver running > at EL1 on the host, partitioning is only allowed in VHE mode. Working > on nVHE mode would require a hypercall for every counter access in the > driver because the counters reserved for the host by HPMN are only > accessible to EL2. > > Signed-off-by: Colton Lewis <coltonlewis@google.com> > --- > arch/arm/include/asm/arm_pmuv3.h | 10 ++++ > arch/arm64/include/asm/arm_pmuv3.h | 5 ++ > drivers/perf/arm_pmuv3.c | 95 +++++++++++++++++++++++++++++- > include/linux/perf/arm_pmu.h | 1 + > 4 files changed, 109 insertions(+), 2 deletions(-) > > diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h > index 2ec0e5e83fc9..9dc43242538c 100644 > --- a/arch/arm/include/asm/arm_pmuv3.h > +++ b/arch/arm/include/asm/arm_pmuv3.h > @@ -228,6 +228,11 @@ static inline bool kvm_set_pmuserenr(u64 val) > > static inline void kvm_vcpu_pmu_resync_el0(void) {} > > +static inline bool has_vhe(void) > +{ > + return false; > +} > + This has nothing to do with PMUv3, I'm a bit surprised to see you're touching 32-bit ARM. Can you just gate the whole partitioning thing on arm64? > +static bool partition_pmu __read_mostly; > +static u8 reserved_guest_counters __read_mostly; > + > +module_param(partition_pmu, bool, 0); > +MODULE_PARM_DESC(partition_pmu, > + "Partition the PMU into host and guest VM counters [y/n]"); > + > +module_param(reserved_guest_counters, byte, 0); > +MODULE_PARM_DESC(reserved_guest_counters, > + "How many counters to reserve for guest VMs [0-$NR_COUNTERS]"); > + This is confusing and not what we discussed offline. Please use a single parameter that describes the number of counters used by the *host*. This affects the *host* PMU driver, KVM can discover (and use) the leftovers. If the single module parameter goes unspecified the user did not ask for PMU partitioning. > +/** > + * armv8pmu_reservation_is_valid() - Determine if reservation is allowed > + * @guest_counters: Number of host counters to reserve > + * > + * Determine if the number of host counters in the argument is > + * allowed. It is allowed if it will produce a valid value for > + * register field MDCR_EL2.HPMN. > + * > + * Return: True if reservation allowed, false otherwise > + */ > +static bool armv8pmu_reservation_is_valid(u8 guest_counters) > +{ > + return guest_counters <= armv8pmu_pmcr_n_read(); > +} > + > +/** > + * armv8pmu_partition_supported() - Determine if partitioning is possible > + * > + * Partitioning is only supported in VHE mode (with PMUv3, assumed > + * since we are in the PMUv3 driver) > + * > + * Return: True if partitioning is possible, false otherwise > + */ > +static bool armv8pmu_partition_supported(void) > +{ > + return has_vhe(); > +} > + > +/** > + * armv8pmu_partition() - Partition the PMU > + * @pmu: Pointer to pmu being partitioned > + * @guest_counters: Number of host counters to reserve > + * > + * Partition the given PMU by taking a number of host counters to > + * reserve and, if it is a valid reservation, recording the > + * corresponding HPMN value in the hpmn field of the PMU and clearing > + * the guest-reserved counters from the counter mask. > + * > + * Passing 0 for @guest_counters has the effect of disabling partitioning. > + * > + * Return: 0 on success, -ERROR otherwise > + */ > +static int armv8pmu_partition(struct arm_pmu *pmu, u8 guest_counters) > +{ > + u8 nr_counters; > + u8 hpmn; > + > + if (!armv8pmu_reservation_is_valid(guest_counters)) > + return -EINVAL; > + > + nr_counters = armv8pmu_pmcr_n_read(); > + hpmn = guest_counters; > + > + pmu->hpmn_max = hpmn; I'm not sure the host driver needs this for anything, KVM just needs to know what's potentially in use by the host. > + /* Inform host driver of available counters */ ... said the driver to itself :) Thanks, Oliver
Oliver Upton <oliver.upton@linux.dev> writes: > On Fri, Jun 20, 2025 at 10:13:07PM +0000, Colton Lewis wrote: >> For PMUv3, the register field MDCR_EL2.HPMN partitiones the PMU >> counters into two ranges where counters 0..HPMN-1 are accessible by >> EL1 and, if allowed, EL0 while counters HPMN..N are only accessible by >> EL2. >> Create module parameters partition_pmu and reserved_guest_counters to >> reserve a number of counters for the guest. These numbers are set at >> boot because the perf subsystem assumes the number of counters will >> not change after the PMU is probed. >> Introduce the function armv8pmu_partition() to modify the PMU driver's >> cntr_mask of available counters to exclude the counters being reserved >> for the guest and record reserved_guest_counters as the maximum >> allowable value for HPMN. >> Due to the difficulty this feature would create for the driver running >> at EL1 on the host, partitioning is only allowed in VHE mode. Working >> on nVHE mode would require a hypercall for every counter access in the >> driver because the counters reserved for the host by HPMN are only >> accessible to EL2. >> Signed-off-by: Colton Lewis <coltonlewis@google.com> >> --- >> arch/arm/include/asm/arm_pmuv3.h | 10 ++++ >> arch/arm64/include/asm/arm_pmuv3.h | 5 ++ >> drivers/perf/arm_pmuv3.c | 95 +++++++++++++++++++++++++++++- >> include/linux/perf/arm_pmu.h | 1 + >> 4 files changed, 109 insertions(+), 2 deletions(-) >> diff --git a/arch/arm/include/asm/arm_pmuv3.h >> b/arch/arm/include/asm/arm_pmuv3.h >> index 2ec0e5e83fc9..9dc43242538c 100644 >> --- a/arch/arm/include/asm/arm_pmuv3.h >> +++ b/arch/arm/include/asm/arm_pmuv3.h >> @@ -228,6 +228,11 @@ static inline bool kvm_set_pmuserenr(u64 val) >> static inline void kvm_vcpu_pmu_resync_el0(void) {} >> +static inline bool has_vhe(void) >> +{ >> + return false; >> +} >> + > This has nothing to do with PMUv3, I'm a bit surprised to see you're > touching 32-bit ARM. Can you just gate the whole partitioning thing on > arm64? The PMUv3 driver also has to compile on 32-bit ARM. My first series had the partitioning code in arch/arm64 but you asked me to move it to the PMUv3 driver. How are you suggesting I square those two requirements? >> +static bool partition_pmu __read_mostly; >> +static u8 reserved_guest_counters __read_mostly; >> + >> +module_param(partition_pmu, bool, 0); >> +MODULE_PARM_DESC(partition_pmu, >> + "Partition the PMU into host and guest VM counters [y/n]"); >> + >> +module_param(reserved_guest_counters, byte, 0); >> +MODULE_PARM_DESC(reserved_guest_counters, >> + "How many counters to reserve for guest VMs [0-$NR_COUNTERS]"); >> + > This is confusing and not what we discussed offline. > Please use a single parameter that describes the number of counters used > by the *host*. This affects the *host* PMU driver, KVM can discover (and > use) the leftovers. > If the single module parameter goes unspecified the user did not ask for > PMU partitioning. I understand what we discussed offline, but I had a dilemma. If we do a single module parameter for number of counters used by the host, then it defaults to 0 if unset and there is no way to distinguish between no partitioning and a request for partitioning reserving 0 counters to the host which I also thought you requested. Would you be happy leaving no way to specify that? In any case, I think the usage is more self explainatory if partitition=[y/n] is a separate bit. The other parameter for guest reservation is then based on a consideration of what an unset parameter should mean and I decided it's a more sane default if partition=y [other-param]=0/unset gives 0 counters to the guest. It does affect the host, but by default the host owns everything. The only people who will be tweaking these parameters are going to be concerned with how many counters the guest gets and I think the parameters should reflect that intent. >> +/** >> + * armv8pmu_reservation_is_valid() - Determine if reservation is allowed >> + * @guest_counters: Number of host counters to reserve >> + * >> + * Determine if the number of host counters in the argument is >> + * allowed. It is allowed if it will produce a valid value for >> + * register field MDCR_EL2.HPMN. >> + * >> + * Return: True if reservation allowed, false otherwise >> + */ >> +static bool armv8pmu_reservation_is_valid(u8 guest_counters) >> +{ >> + return guest_counters <= armv8pmu_pmcr_n_read(); >> +} >> + >> +/** >> + * armv8pmu_partition_supported() - Determine if partitioning is >> possible >> + * >> + * Partitioning is only supported in VHE mode (with PMUv3, assumed >> + * since we are in the PMUv3 driver) >> + * >> + * Return: True if partitioning is possible, false otherwise >> + */ >> +static bool armv8pmu_partition_supported(void) >> +{ >> + return has_vhe(); >> +} >> + >> +/** >> + * armv8pmu_partition() - Partition the PMU >> + * @pmu: Pointer to pmu being partitioned >> + * @guest_counters: Number of host counters to reserve >> + * >> + * Partition the given PMU by taking a number of host counters to >> + * reserve and, if it is a valid reservation, recording the >> + * corresponding HPMN value in the hpmn field of the PMU and clearing >> + * the guest-reserved counters from the counter mask. >> + * >> + * Passing 0 for @guest_counters has the effect of disabling >> partitioning. >> + * >> + * Return: 0 on success, -ERROR otherwise >> + */ >> +static int armv8pmu_partition(struct arm_pmu *pmu, u8 guest_counters) >> +{ >> + u8 nr_counters; >> + u8 hpmn; >> + >> + if (!armv8pmu_reservation_is_valid(guest_counters)) >> + return -EINVAL; >> + >> + nr_counters = armv8pmu_pmcr_n_read(); >> + hpmn = guest_counters; >> + >> + pmu->hpmn_max = hpmn; > I'm not sure the host driver needs this for anything, KVM just needs to > know what's potentially in use by the host. >> + /* Inform host driver of available counters */ > ... said the driver to itself :) I can delete that comment now :)
On Mon, Jun 23, 2025 at 06:26:42PM +0000, Colton Lewis wrote: > Oliver Upton <oliver.upton@linux.dev> writes: > > > On Fri, Jun 20, 2025 at 10:13:07PM +0000, Colton Lewis wrote: > > > For PMUv3, the register field MDCR_EL2.HPMN partitiones the PMU > > > counters into two ranges where counters 0..HPMN-1 are accessible by > > > EL1 and, if allowed, EL0 while counters HPMN..N are only accessible by > > > EL2. > > > > Create module parameters partition_pmu and reserved_guest_counters to > > > reserve a number of counters for the guest. These numbers are set at > > > boot because the perf subsystem assumes the number of counters will > > > not change after the PMU is probed. > > > > Introduce the function armv8pmu_partition() to modify the PMU driver's > > > cntr_mask of available counters to exclude the counters being reserved > > > for the guest and record reserved_guest_counters as the maximum > > > allowable value for HPMN. > > > > Due to the difficulty this feature would create for the driver running > > > at EL1 on the host, partitioning is only allowed in VHE mode. Working > > > on nVHE mode would require a hypercall for every counter access in the > > > driver because the counters reserved for the host by HPMN are only > > > accessible to EL2. > > > > Signed-off-by: Colton Lewis <coltonlewis@google.com> > > > --- > > > arch/arm/include/asm/arm_pmuv3.h | 10 ++++ > > > arch/arm64/include/asm/arm_pmuv3.h | 5 ++ > > > drivers/perf/arm_pmuv3.c | 95 +++++++++++++++++++++++++++++- > > > include/linux/perf/arm_pmu.h | 1 + > > > 4 files changed, 109 insertions(+), 2 deletions(-) > > > > diff --git a/arch/arm/include/asm/arm_pmuv3.h > > > b/arch/arm/include/asm/arm_pmuv3.h > > > index 2ec0e5e83fc9..9dc43242538c 100644 > > > --- a/arch/arm/include/asm/arm_pmuv3.h > > > +++ b/arch/arm/include/asm/arm_pmuv3.h > > > @@ -228,6 +228,11 @@ static inline bool kvm_set_pmuserenr(u64 val) > > > > static inline void kvm_vcpu_pmu_resync_el0(void) {} > > > > +static inline bool has_vhe(void) > > > +{ > > > + return false; > > > +} > > > + > > > This has nothing to do with PMUv3, I'm a bit surprised to see you're > > touching 32-bit ARM. Can you just gate the whole partitioning thing on > > arm64? > > The PMUv3 driver also has to compile on 32-bit ARM. Quite aware. > My first series had the partitioning code in arch/arm64 but you asked me > to move it to the PMUv3 driver. > > How are you suggesting I square those two requirements? You should try to structure your predicates in such a way that the partitioning stuff all resolves to false for 32 bit arm, generally. That way we can avoid stubbing out silly things like has_vhe() which doesn't make sense in the context of 32 bit. > > > +static bool partition_pmu __read_mostly; > > > +static u8 reserved_guest_counters __read_mostly; > > > + > > > +module_param(partition_pmu, bool, 0); > > > +MODULE_PARM_DESC(partition_pmu, > > > + "Partition the PMU into host and guest VM counters [y/n]"); > > > + > > > +module_param(reserved_guest_counters, byte, 0); > > > +MODULE_PARM_DESC(reserved_guest_counters, > > > + "How many counters to reserve for guest VMs [0-$NR_COUNTERS]"); > > > + > > > This is confusing and not what we discussed offline. > > > Please use a single parameter that describes the number of counters used > > by the *host*. This affects the *host* PMU driver, KVM can discover (and > > use) the leftovers. > > > If the single module parameter goes unspecified the user did not ask for > > PMU partitioning. > > I understand what we discussed offline, but I had a dilemma. > > If we do a single module parameter for number of counters used by the > host, then it defaults to 0 if unset and there is no way to distinguish > between no partitioning and a request for partitioning reserving 0 > counters to the host which I also thought you requested. Would you be > happy leaving no way to specify that? You can make the command line use a signed integer for storage and a reset value of -1. -1 would imply default behavior (no partitioning) and a non-negative value would imply partitioning. > In any case, I think the usage is more self explainatory if > partitition=[y/n] is a separate bit. What would be the user's intent of "partition_pmu=n reserved_guest_counters=$X"? Thanks, Oliver
Oliver Upton <oliver.upton@linux.dev> writes: > On Mon, Jun 23, 2025 at 06:26:42PM +0000, Colton Lewis wrote: >> Oliver Upton <oliver.upton@linux.dev> writes: >> > On Fri, Jun 20, 2025 at 10:13:07PM +0000, Colton Lewis wrote: >> > > For PMUv3, the register field MDCR_EL2.HPMN partitiones the PMU >> > > counters into two ranges where counters 0..HPMN-1 are accessible by >> > > EL1 and, if allowed, EL0 while counters HPMN..N are only accessible >> by >> > > EL2. >> > > Create module parameters partition_pmu and reserved_guest_counters to >> > > reserve a number of counters for the guest. These numbers are set at >> > > boot because the perf subsystem assumes the number of counters will >> > > not change after the PMU is probed. >> > > Introduce the function armv8pmu_partition() to modify the PMU >> driver's >> > > cntr_mask of available counters to exclude the counters being >> reserved >> > > for the guest and record reserved_guest_counters as the maximum >> > > allowable value for HPMN. >> > > Due to the difficulty this feature would create for the driver >> running >> > > at EL1 on the host, partitioning is only allowed in VHE mode. Working >> > > on nVHE mode would require a hypercall for every counter access in >> the >> > > driver because the counters reserved for the host by HPMN are only >> > > accessible to EL2. >> > > Signed-off-by: Colton Lewis <coltonlewis@google.com> >> > > --- >> > > arch/arm/include/asm/arm_pmuv3.h | 10 ++++ >> > > arch/arm64/include/asm/arm_pmuv3.h | 5 ++ >> > > drivers/perf/arm_pmuv3.c | 95 >> +++++++++++++++++++++++++++++- >> > > include/linux/perf/arm_pmu.h | 1 + >> > > 4 files changed, 109 insertions(+), 2 deletions(-) >> > > diff --git a/arch/arm/include/asm/arm_pmuv3.h >> > > b/arch/arm/include/asm/arm_pmuv3.h >> > > index 2ec0e5e83fc9..9dc43242538c 100644 >> > > --- a/arch/arm/include/asm/arm_pmuv3.h >> > > +++ b/arch/arm/include/asm/arm_pmuv3.h >> > > @@ -228,6 +228,11 @@ static inline bool kvm_set_pmuserenr(u64 val) >> > > static inline void kvm_vcpu_pmu_resync_el0(void) {} >> > > +static inline bool has_vhe(void) >> > > +{ >> > > + return false; >> > > +} >> > > + >> > This has nothing to do with PMUv3, I'm a bit surprised to see you're >> > touching 32-bit ARM. Can you just gate the whole partitioning thing on >> > arm64? >> The PMUv3 driver also has to compile on 32-bit ARM. > Quite aware. >> My first series had the partitioning code in arch/arm64 but you asked me >> to move it to the PMUv3 driver. >> How are you suggesting I square those two requirements? > You should try to structure your predicates in such a way that the > partitioning stuff all resolves to false for 32 bit arm, generally. That > way we can avoid stubbing out silly things like has_vhe() which doesn't > make sense in the context of 32 bit. Okay. I will do that. When I was reworking it I thought it looked weird to have the predicates live in a different location than the main partitioning function. >> > > +static bool partition_pmu __read_mostly; >> > > +static u8 reserved_guest_counters __read_mostly; >> > > + >> > > +module_param(partition_pmu, bool, 0); >> > > +MODULE_PARM_DESC(partition_pmu, >> > > + "Partition the PMU into host and guest VM counters [y/n]"); >> > > + >> > > +module_param(reserved_guest_counters, byte, 0); >> > > +MODULE_PARM_DESC(reserved_guest_counters, >> > > + "How many counters to reserve for guest VMs [0-$NR_COUNTERS]"); >> > > + >> > This is confusing and not what we discussed offline. >> > Please use a single parameter that describes the number of counters >> used >> > by the *host*. This affects the *host* PMU driver, KVM can discover >> (and >> > use) the leftovers. >> > If the single module parameter goes unspecified the user did not ask >> for >> > PMU partitioning. >> I understand what we discussed offline, but I had a dilemma. >> If we do a single module parameter for number of counters used by the >> host, then it defaults to 0 if unset and there is no way to distinguish >> between no partitioning and a request for partitioning reserving 0 >> counters to the host which I also thought you requested. Would you be >> happy leaving no way to specify that? > You can make the command line use a signed integer for storage and a > reset value of -1. > -1 would imply default behavior (no partitioning) and a non-negative > value would imply partitioning. Good idea. I thought of that solution myself for the first time after I logged off yesterday. Slightly embarrassed I didn't see it sooner :( >> In any case, I think the usage is more self explainatory if >> partitition=[y/n] is a separate bit. > What would be the user's intent of "partition_pmu=n > reserved_guest_counters=$X"? That doesn't make sense, which is a decent argument for using just one parameter. I'm now fine with going back to just reserved_host_counters.
© 2016 - 2025 Red Hat, Inc.