From: Jinrong Liang <cloudliang@tencent.com>
By defining the PMU performance events and masks relevant for x86 in
the new pmu.h header, it becomes easier to reference them, minimizing
potential errors in code that handles these values.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Jinrong Liang <cloudliang@tencent.com>
---
.../selftests/kvm/include/x86_64/pmu.h | 124 ++++++++++++++++++
1 file changed, 124 insertions(+)
create mode 100644 tools/testing/selftests/kvm/include/x86_64/pmu.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86_64/pmu.h
new file mode 100644
index 000000000000..eb60b2065fac
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/pmu.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/pmu.h
+ *
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include "processor.h"
+
+#define GP_COUNTER_NR_OFS_BIT 8
+#define EVENT_LENGTH_OFS_BIT 24
+#define INTEL_PMC_IDX_FIXED 32
+
+#define AMD64_NR_COUNTERS 4
+#define AMD64_NR_COUNTERS_CORE 6
+
+#define PMU_CAP_FW_WRITES BIT_ULL(13)
+#define RDPMC_FIXED_BASE BIT_ULL(30)
+
+#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0)
+#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8)
+#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16)
+#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17)
+
+#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19)
+#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20)
+#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22)
+#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23)
+#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24)
+
+#define PMU_VERSION_MASK GENMASK_ULL(7, 0)
+#define EVENT_LENGTH_MASK GENMASK_ULL(31, EVENT_LENGTH_OFS_BIT)
+#define GP_COUNTER_NR_MASK GENMASK_ULL(15, GP_COUNTER_NR_OFS_BIT)
+#define FIXED_COUNTER_NR_MASK GENMASK_ULL(4, 0)
+
+/* Definitions for Architectural Performance Events */
+#define ARCH_EVENT(select, umask) (((select) & 0xff) | ((umask) & 0xff) << 8)
+
+enum intel_pmu_architectural_events {
+ /*
+ * The order of the architectural events matters as support for each
+ * event is enumerated via CPUID using the index of the event.
+ */
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+
+ NR_REAL_INTEL_ARCH_EVENTS,
+
+ /*
+ * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
+ * TSC reference cycles. The architectural reference cycles event may
+ * or may not actually use the TSC as the reference, e.g. might use the
+ * core crystal clock or the bus clock (yeah, "architectural").
+ */
+ PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
+ NR_INTEL_ARCH_EVENTS,
+};
+
+static const uint64_t intel_arch_events[] = {
+ [INTEL_ARCH_CPU_CYCLES] = ARCH_EVENT(0x3c, 0x0),
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED] = ARCH_EVENT(0xc0, 0x0),
+ [INTEL_ARCH_REFERENCE_CYCLES] = ARCH_EVENT(0x3c, 0x1),
+ [INTEL_ARCH_LLC_REFERENCES] = ARCH_EVENT(0x2e, 0x4f),
+ [INTEL_ARCH_LLC_MISSES] = ARCH_EVENT(0x2e, 0x41),
+ [INTEL_ARCH_BRANCHES_RETIRED] = ARCH_EVENT(0xc4, 0x0),
+ [INTEL_ARCH_BRANCHES_MISPREDICTED] = ARCH_EVENT(0xc5, 0x0),
+ [PSEUDO_ARCH_REFERENCE_CYCLES] = ARCH_EVENT(0xa4, 0x1),
+};
+
+/* mapping between fixed pmc index and intel_arch_events array */
+static const int fixed_pmc_events[] = {
+ [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ [1] = INTEL_ARCH_CPU_CYCLES,
+ [2] = PSEUDO_ARCH_REFERENCE_CYCLES,
+};
+
+enum amd_pmu_k7_events {
+ AMD_ZEN_CORE_CYCLES,
+ AMD_ZEN_INSTRUCTIONS,
+ AMD_ZEN_BRANCHES,
+ AMD_ZEN_BRANCH_MISSES,
+};
+
+static const uint64_t amd_arch_events[] = {
+ [AMD_ZEN_CORE_CYCLES] = ARCH_EVENT(0x76, 0x00),
+ [AMD_ZEN_INSTRUCTIONS] = ARCH_EVENT(0xc0, 0x00),
+ [AMD_ZEN_BRANCHES] = ARCH_EVENT(0xc2, 0x00),
+ [AMD_ZEN_BRANCH_MISSES] = ARCH_EVENT(0xc3, 0x00),
+};
+
+static inline bool arch_event_is_supported(struct kvm_vcpu *vcpu,
+ uint8_t arch_event)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = vcpu_get_cpuid_entry(vcpu, 0xa);
+
+ return !(entry->ebx & BIT_ULL(arch_event)) &&
+ (kvm_cpuid_property(vcpu->cpuid,
+ X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) > arch_event);
+}
+
+static inline bool fixed_counter_is_supported(struct kvm_vcpu *vcpu,
+ uint8_t fixed_counter_idx)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = vcpu_get_cpuid_entry(vcpu, 0xa);
+
+ return (entry->ecx & BIT_ULL(fixed_counter_idx) ||
+ (kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_PMU_NR_FIXED_COUNTERS) >
+ fixed_counter_idx));
+}
+
+#endif /* SELFTEST_KVM_PMU_H */
--
2.39.3
On 14/8/2023 7:50 pm, Jinrong Liang wrote: > +#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18) > +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19) > +#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20) > +#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21) > +#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22) > +#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23) > +#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24) Could you write more test cases to cover all EVENTSEL bits including ENABLE bit ?
Like Xu <like.xu.linux@gmail.com> 于2023年8月21日周一 16:56写道: > > On 14/8/2023 7:50 pm, Jinrong Liang wrote: > > +#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18) > > +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19) > > +#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20) > > +#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21) > > +#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22) > > +#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23) > > +#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24) > > Could you write more test cases to cover all EVENTSEL bits including ENABLE bit ? I am more than willing to write additional test cases to cover all EVENTSEL bits, including the ENABLE bit. If you have any specific suggestions or scenarios you'd like me to cover in the new test cases, please feel free to share them. I am open to any ideas that can further improve the coverage and quality of our selftests. Thanks
On Mon, Aug 14, 2023, Jinrong Liang wrote:
> +static const uint64_t intel_arch_events[] = {
Hmm, rather than have a bunch of static arrays, I think it makes sense to go ahead
and add lib/pmu.c. Hmm, and this should probably be further namespaced, e.g.
intel_pmu_arch_events
> + [INTEL_ARCH_CPU_CYCLES] = ARCH_EVENT(0x3c, 0x0),
> + [INTEL_ARCH_INSTRUCTIONS_RETIRED] = ARCH_EVENT(0xc0, 0x0),
> + [INTEL_ARCH_REFERENCE_CYCLES] = ARCH_EVENT(0x3c, 0x1),
> + [INTEL_ARCH_LLC_REFERENCES] = ARCH_EVENT(0x2e, 0x4f),
> + [INTEL_ARCH_LLC_MISSES] = ARCH_EVENT(0x2e, 0x41),
> + [INTEL_ARCH_BRANCHES_RETIRED] = ARCH_EVENT(0xc4, 0x0),
> + [INTEL_ARCH_BRANCHES_MISPREDICTED] = ARCH_EVENT(0xc5, 0x0),
> + [PSEUDO_ARCH_REFERENCE_CYCLES] = ARCH_EVENT(0xa4, 0x1),
> +};
> +
> +/* mapping between fixed pmc index and intel_arch_events array */
> +static const int fixed_pmc_events[] = {
Please be consistent with names (even if KVM itself may be anything but). KVM
gets away with sloppiness because the arrays are only visible to pmu_intel.c,
but that's not the case here.
intel_pmu_fixed_pmc_events?
> + [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
> + [1] = INTEL_ARCH_CPU_CYCLES,
> + [2] = PSEUDO_ARCH_REFERENCE_CYCLES,
> +};
> +
> +enum amd_pmu_k7_events {
> + AMD_ZEN_CORE_CYCLES,
> + AMD_ZEN_INSTRUCTIONS,
> + AMD_ZEN_BRANCHES,
> + AMD_ZEN_BRANCH_MISSES,
> +};
> +
> +static const uint64_t amd_arch_events[] = {
And then amd_pmu_arch_events.
> + [AMD_ZEN_CORE_CYCLES] = ARCH_EVENT(0x76, 0x00),
> + [AMD_ZEN_INSTRUCTIONS] = ARCH_EVENT(0xc0, 0x00),
> + [AMD_ZEN_BRANCHES] = ARCH_EVENT(0xc2, 0x00),
> + [AMD_ZEN_BRANCH_MISSES] = ARCH_EVENT(0xc3, 0x00),
> +};
> +
> +static inline bool arch_event_is_supported(struct kvm_vcpu *vcpu,
> + uint8_t arch_event)
Same namespace problem. And I'd put the "is" earlier so that it's clearly a
question and not a command, e.g. "is this arch event supported?" versus "this
arch event is supported".
So pmu_is_arch_event_supported()? Actually, no, you're reinventing the wheel.
You can query all of the Intel arch events from within the guest via this_pmu_has(),
e.g. see X86_PMU_FEATURE_BRANCH_INSNS_RETIRED. You just need a helper (or array)
to get from an arbitrary index to its associated feature.
And now that GUEST_ASSERT_EQ() prints values, you can just do
counter = _rdpmc(i);
GUEST_ASSERT_EQ(this_pmu_has(...), !!counter);
in guest_measure_loop() instead of funneling the counter into ucall and back to
the host.
> +{
> + struct kvm_cpuid_entry2 *entry;
> +
> + entry = vcpu_get_cpuid_entry(vcpu, 0xa);
> +
> + return !(entry->ebx & BIT_ULL(arch_event)) &&
> + (kvm_cpuid_property(vcpu->cpuid,
> + X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) > arch_event);
> +}
> +
> +static inline bool fixed_counter_is_supported(struct kvm_vcpu *vcpu,
More namespace problems. I don't love pmu_is_fixed_counter_supported(), because
that glosses over this operating on the vCPU, e.g. not KVM and not guest CPUID
(from within the guest).
And with a bit of massaging to the "anti-feature" framework, this_pmu_has() and
kvm_pmu_has() can be extended (unless I'm missing something).
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 07b980b8bec2..21f0c45c2ac6 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -287,12 +287,12 @@ struct kvm_x86_cpu_property {
* architectural event is supported.
*/
struct kvm_x86_pmu_feature {
- struct kvm_x86_cpu_feature anti_feature;
+ struct kvm_x86_cpu_feature pmu_feature;
};
#define KVM_X86_PMU_FEATURE(name, __bit) \
({ \
struct kvm_x86_pmu_feature feature = { \
- .anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit), \
+ .pmu_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit), \
}; \
\
feature; \
@@ -690,10 +690,19 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
{
- uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ uint32_t nr_bits;
- return nr_bits > feature.anti_feature.bit &&
- !this_cpu_has(feature.anti_feature);
+ if (feature.pmu_feature.reg == KVM_CPUID_EBX) {
+ nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ return nr_bits > feature.pmu_feature.bit &&
+ !this_cpu_has(feature.pmu_feature);
+ } else if (feature.pmu_feature.reg == KVM_CPUID_ECX) {
+ nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ return nr_bits > feature.pmu_feature.bit ||
+ this_cpu_has(feature.pmu_feature);
+ } else {
+ TEST_FAIL(...);
+ }
}
static __always_inline uint64_t this_cpu_supported_xcr0(void)
That doesn't give you a direct path to replacing fixed_counter_is_supported(),
but the usage in intel_test_oob_fixed_ctr() is bizarre and looks wrong, e.g. if
it's not supported, the test does nothing.
© 2016 - 2025 Red Hat, Inc.