From: Like Xu <likexu@tencent.com>
The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
which is clearly not supported with zero code effort in the current KVM.
A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
take back control of this virt capability, which also makes it easier to
statically partition all available counters between hosts and guests.
Signed-off-by: Like Xu <likexu@tencent.com>
---
arch/x86/kvm/pmu.h | 2 ++
arch/x86/kvm/svm/pmu.c | 7 ++++---
arch/x86/kvm/x86.c | 2 ++
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 847e7112a5d3..e3a3813b6a38 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -18,6 +18,8 @@
#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
+#define KVM_AMD_PMC_MAX_GENERIC AMD64_NUM_COUNTERS_CORE
+
struct kvm_event_hw_type_mapping {
u8 eventsel;
u8 unit_mask;
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 2ec420b85d6a..f99f2c869664 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
+ BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
+ BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
- for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
+ for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
@@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
+ for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
struct kvm_pmc *pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43a6a7efc6ec..b9738efd8425 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1444,12 +1444,14 @@ static const u32 msrs_to_save_all[] = {
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
+ /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
+
MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
--
2.37.3
On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
>
> From: Like Xu <likexu@tencent.com>
>
> The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
> which is clearly not supported with zero code effort in the current KVM.
>
> A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
> take back control of this virt capability, which also makes it easier to
> statically partition all available counters between hosts and guests.
>
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---
> arch/x86/kvm/pmu.h | 2 ++
> arch/x86/kvm/svm/pmu.c | 7 ++++---
> arch/x86/kvm/x86.c | 2 ++
> 3 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 847e7112a5d3..e3a3813b6a38 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -18,6 +18,8 @@
> #define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
> #define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
>
> +#define KVM_AMD_PMC_MAX_GENERIC AMD64_NUM_COUNTERS_CORE
> +
> struct kvm_event_hw_type_mapping {
> u8 eventsel;
> u8 unit_mask;
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index 2ec420b85d6a..f99f2c869664 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> int i;
>
> - BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
> + BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
> + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
>
> - for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
> + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
> pmu->gp_counters[i].type = KVM_PMC_GP;
> pmu->gp_counters[i].vcpu = vcpu;
> pmu->gp_counters[i].idx = i;
> @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> int i;
>
> - for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
> + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
> struct kvm_pmc *pmc = &pmu->gp_counters[i];
>
> pmc_stop_counter(pmc);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 43a6a7efc6ec..b9738efd8425 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1444,12 +1444,14 @@ static const u32 msrs_to_save_all[] = {
> MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
IIRC, the effective maximum on the Intel side is 18, despite what
INTEL_PMC_MAX_GENERIC says, due to collisions with other existing MSR
indices. That's why the Intel list stops here. Should we introduce a
KVM_INTEL_PMC_MAX_GENERIC as well?
> MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>
> + /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
Perhaps the comment above should be moved down two lines, since the
next two lines deal with the legacy counters.
> MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
> MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
> MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
> MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
> MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
> MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
At some point, we may want to consider populating the PMU MSR list
dynamically, rather than statically enumerating all of them (for both
AMD and Intel).
Reviewed-by: Jim Mattson <jmattson@google.com>
On 6/9/2022 1:26 am, Jim Mattson wrote:
> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
>>
>> From: Like Xu <likexu@tencent.com>
>>
>> The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
>> which is clearly not supported with zero code effort in the current KVM.
>>
>> A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
>> take back control of this virt capability, which also makes it easier to
>> statically partition all available counters between hosts and guests.
>>
>> Signed-off-by: Like Xu <likexu@tencent.com>
>> ---
>> arch/x86/kvm/pmu.h | 2 ++
>> arch/x86/kvm/svm/pmu.c | 7 ++++---
>> arch/x86/kvm/x86.c | 2 ++
>> 3 files changed, 8 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
>> index 847e7112a5d3..e3a3813b6a38 100644
>> --- a/arch/x86/kvm/pmu.h
>> +++ b/arch/x86/kvm/pmu.h
>> @@ -18,6 +18,8 @@
>> #define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
>> #define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
>>
>> +#define KVM_AMD_PMC_MAX_GENERIC AMD64_NUM_COUNTERS_CORE
>> +
>> struct kvm_event_hw_type_mapping {
>> u8 eventsel;
>> u8 unit_mask;
>> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
>> index 2ec420b85d6a..f99f2c869664 100644
>> --- a/arch/x86/kvm/svm/pmu.c
>> +++ b/arch/x86/kvm/svm/pmu.c
>> @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> int i;
>>
>> - BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
>> + BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
>> + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
>>
>> - for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
>> + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
>> pmu->gp_counters[i].type = KVM_PMC_GP;
>> pmu->gp_counters[i].vcpu = vcpu;
>> pmu->gp_counters[i].idx = i;
>> @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> int i;
>>
>> - for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
>> + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
>> struct kvm_pmc *pmc = &pmu->gp_counters[i];
>>
>> pmc_stop_counter(pmc);
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 43a6a7efc6ec..b9738efd8425 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1444,12 +1444,14 @@ static const u32 msrs_to_save_all[] = {
>> MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
>
> IIRC, the effective maximum on the Intel side is 18, despite what
> INTEL_PMC_MAX_GENERIC says, due to collisions with other existing MSR
Emm, check https://lore.kernel.org/kvm/20220906081604.24035-1-likexu@tencent.com/
> indices. That's why the Intel list stops here. Should we introduce a
> KVM_INTEL_PMC_MAX_GENERIC as well?
Yes, this suggestion will be applied in the next version.
>
>> MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>>
>> + /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
>
> Perhaps the comment above should be moved down two lines, since the
> next two lines deal with the legacy counters.
Applied, thanks.
>
>> MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
>> MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
>> MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
>> MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
>> MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
>> MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
>
> At some point, we may want to consider populating the PMU MSR list
> dynamically, rather than statically enumerating all of them (for both
> AMD and Intel).
The uncertainty of msrs_to_save_all[] may cause troubles for legacy user spaces.
I had draft patches to rewrite pmu msr accesses for host-initiated as first move.
>
> Reviewed-by: Jim Mattson <jmattson@google.com>
© 2016 - 2026 Red Hat, Inc.