[PATCH v2 6/6] KVM: x86: selftests: Test PerfMonV2

Colton Lewis posted 6 patches 1 year, 4 months ago
[PATCH v2 6/6] KVM: x86: selftests: Test PerfMonV2
Posted by Colton Lewis 1 year, 4 months ago
Test PerfMonV2, which defines global registers to enable multiple
performance counters with a single MSR write, in its own function.

If the feature is available, ensure the global control register has
the ability to start and stop the performance counters and the global
status register correctly flags an overflow by the associated counter.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
---
 .../selftests/kvm/x86_64/pmu_counters_test.c  | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index cf2941cc7c4c..a90df8b67a19 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -763,10 +763,63 @@ static void guest_test_core_events(void)
 	}
 }
 
+static void guest_test_perfmon_v2(void)
+{
+	uint64_t i;
+	uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+		ARCH_PERFMON_EVENTSEL_ENABLE |
+		AMD_ZEN_CORE_CYCLES;
+	bool core_ext = this_cpu_has(X86_FEATURE_PERF_CTR_EXT_CORE);
+	uint64_t sel_msr_base = core_ext ? MSR_F15H_PERF_CTL : MSR_K7_EVNTSEL0;
+	uint64_t cnt_msr_base = core_ext ? MSR_F15H_PERF_CTR : MSR_K7_PERFCTR0;
+	uint64_t msr_step = core_ext ? 2 : 1;
+	uint8_t nr_counters = guest_nr_core_counters();
+	bool perfmon_v2 = this_cpu_has(X86_FEATURE_PERFMON_V2);
+	uint64_t sel_msr;
+	uint64_t cnt_msr;
+
+	if (!perfmon_v2)
+		return;
+
+	for (i = 0; i < nr_counters; i++) {
+		sel_msr = sel_msr_base + msr_step * i;
+		cnt_msr = cnt_msr_base + msr_step * i;
+
+		/* Ensure count stays 0 when global register disables counter. */
+		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
+		wrmsr(sel_msr, eventsel);
+		wrmsr(cnt_msr, 0);
+		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+		GUEST_ASSERT(!_rdpmc(i));
+
+		/* Ensure counter is >0 when global register enables counter. */
+		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, BIT_ULL(i));
+		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
+		GUEST_ASSERT(_rdpmc(i));
+
+		/* Ensure global status register flags a counter overflow. */
+		wrmsr(cnt_msr, -1);
+		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, 0xff);
+		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, BIT_ULL(i));
+		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
+		GUEST_ASSERT(rdmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS) &
+			     BIT_ULL(i));
+
+		/* Ensure global status register flag is cleared correctly. */
+		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, BIT_ULL(i));
+		GUEST_ASSERT(!(rdmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS) &
+			     BIT_ULL(i)));
+	}
+}
+
+
 static void guest_test_core_counters(void)
 {
 	guest_test_rdwr_core_counters();
 	guest_test_core_events();
+	guest_test_perfmon_v2();
 	GUEST_DONE();
 }
 
-- 
2.46.0.662.g92d0881bb0-goog
Re: [PATCH v2 6/6] KVM: x86: selftests: Test PerfMonV2
Posted by Sean Christopherson 1 year, 1 month ago
On Wed, Sep 18, 2024, Colton Lewis wrote:
> Test PerfMonV2, which defines global registers to enable multiple
> performance counters with a single MSR write, in its own function.
> 
> If the feature is available, ensure the global control register has
> the ability to start and stop the performance counters and the global
> status register correctly flags an overflow by the associated counter.
> 
> Signed-off-by: Colton Lewis <coltonlewis@google.com>
> ---
>  .../selftests/kvm/x86_64/pmu_counters_test.c  | 53 +++++++++++++++++++
>  1 file changed, 53 insertions(+)
> 
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> index cf2941cc7c4c..a90df8b67a19 100644
> --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> @@ -763,10 +763,63 @@ static void guest_test_core_events(void)
>  	}
>  }
>  
> +static void guest_test_perfmon_v2(void)
> +{
> +	uint64_t i;
> +	uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
> +		ARCH_PERFMON_EVENTSEL_ENABLE |
> +		AMD_ZEN_CORE_CYCLES;

Hmm.  I like the extra coverage, but I think the guts of this test belong is
common code, because the core logic is the same across Intel and AMD (I think),
only the MSRs are different.

Maybe a library helper that takes in the MSRs as parameters?  Not sure.

I suspect it'll take some back and forth to figure out how best to validate these
more "advanced" behaviors, so maybe skip this patch for the next version?  I.e.
land basic AMD coverage and then we can figure out how to test global control and
status.

> +	bool core_ext = this_cpu_has(X86_FEATURE_PERF_CTR_EXT_CORE);
> +	uint64_t sel_msr_base = core_ext ? MSR_F15H_PERF_CTL : MSR_K7_EVNTSEL0;
> +	uint64_t cnt_msr_base = core_ext ? MSR_F15H_PERF_CTR : MSR_K7_PERFCTR0;
> +	uint64_t msr_step = core_ext ? 2 : 1;
> +	uint8_t nr_counters = guest_nr_core_counters();
> +	bool perfmon_v2 = this_cpu_has(X86_FEATURE_PERFMON_V2);

Zero reason to capture this in a local variable.

> +	uint64_t sel_msr;
> +	uint64_t cnt_msr;
> +
> +	if (!perfmon_v2)
> +		return;
> +
> +	for (i = 0; i < nr_counters; i++) {
> +		sel_msr = sel_msr_base + msr_step * i;
> +		cnt_msr = cnt_msr_base + msr_step * i;
> +
> +		/* Ensure count stays 0 when global register disables counter. */
> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
> +		wrmsr(sel_msr, eventsel);
> +		wrmsr(cnt_msr, 0);
> +		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
> +		GUEST_ASSERT(!_rdpmc(i));
> +
> +		/* Ensure counter is >0 when global register enables counter. */
> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, BIT_ULL(i));
> +		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
> +		GUEST_ASSERT(_rdpmc(i));
> +
> +		/* Ensure global status register flags a counter overflow. */
> +		wrmsr(cnt_msr, -1);
> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, 0xff);
> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, BIT_ULL(i));
> +		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
> +		GUEST_ASSERT(rdmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS) &
> +			     BIT_ULL(i));
> +
> +		/* Ensure global status register flag is cleared correctly. */
> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, BIT_ULL(i));
> +		GUEST_ASSERT(!(rdmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS) &
> +			     BIT_ULL(i)));
> +	}
> +}
> +
> +
>  static void guest_test_core_counters(void)
>  {
>  	guest_test_rdwr_core_counters();
>  	guest_test_core_events();
> +	guest_test_perfmon_v2();
>  	GUEST_DONE();
>  }
>  
> -- 
> 2.46.0.662.g92d0881bb0-goog
>
Re: [PATCH v2 6/6] KVM: x86: selftests: Test PerfMonV2
Posted by Colton Lewis 1 year ago
Sean Christopherson <seanjc@google.com> writes:

> On Wed, Sep 18, 2024, Colton Lewis wrote:
>> Test PerfMonV2, which defines global registers to enable multiple
>> performance counters with a single MSR write, in its own function.

>> If the feature is available, ensure the global control register has
>> the ability to start and stop the performance counters and the global
>> status register correctly flags an overflow by the associated counter.

>> Signed-off-by: Colton Lewis <coltonlewis@google.com>
>> ---
>>   .../selftests/kvm/x86_64/pmu_counters_test.c  | 53 +++++++++++++++++++
>>   1 file changed, 53 insertions(+)

>> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c  
>> b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
>> index cf2941cc7c4c..a90df8b67a19 100644
>> --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
>> +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
>> @@ -763,10 +763,63 @@ static void guest_test_core_events(void)
>>   	}
>>   }

>> +static void guest_test_perfmon_v2(void)
>> +{
>> +	uint64_t i;
>> +	uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
>> +		ARCH_PERFMON_EVENTSEL_ENABLE |
>> +		AMD_ZEN_CORE_CYCLES;

> Hmm.  I like the extra coverage, but I think the guts of this test belong  
> is
> common code, because the core logic is the same across Intel and AMD (I  
> think),
> only the MSRs are different.

> Maybe a library helper that takes in the MSRs as parameters?  Not sure.

I'll explore some other options

> I suspect it'll take some back and forth to figure out how best to  
> validate these
> more "advanced" behaviors, so maybe skip this patch for the next  
> version?  I.e.
> land basic AMD coverage and then we can figure out how to test global  
> control and
> status.

Sure

>> +	bool core_ext = this_cpu_has(X86_FEATURE_PERF_CTR_EXT_CORE);
>> +	uint64_t sel_msr_base = core_ext ? MSR_F15H_PERF_CTL : MSR_K7_EVNTSEL0;
>> +	uint64_t cnt_msr_base = core_ext ? MSR_F15H_PERF_CTR : MSR_K7_PERFCTR0;
>> +	uint64_t msr_step = core_ext ? 2 : 1;
>> +	uint8_t nr_counters = guest_nr_core_counters();
>> +	bool perfmon_v2 = this_cpu_has(X86_FEATURE_PERFMON_V2);

> Zero reason to capture this in a local variable.

Will delete

>> +	uint64_t sel_msr;
>> +	uint64_t cnt_msr;
>> +
>> +	if (!perfmon_v2)
>> +		return;
>> +
>> +	for (i = 0; i < nr_counters; i++) {
>> +		sel_msr = sel_msr_base + msr_step * i;
>> +		cnt_msr = cnt_msr_base + msr_step * i;
>> +
>> +		/* Ensure count stays 0 when global register disables counter. */
>> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
>> +		wrmsr(sel_msr, eventsel);
>> +		wrmsr(cnt_msr, 0);
>> +		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
>> +		GUEST_ASSERT(!_rdpmc(i));
>> +
>> +		/* Ensure counter is >0 when global register enables counter. */
>> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, BIT_ULL(i));
>> +		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
>> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
>> +		GUEST_ASSERT(_rdpmc(i));
>> +
>> +		/* Ensure global status register flags a counter overflow. */
>> +		wrmsr(cnt_msr, -1);
>> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, 0xff);
>> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, BIT_ULL(i));
>> +		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
>> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
>> +		GUEST_ASSERT(rdmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS) &
>> +			     BIT_ULL(i));
>> +
>> +		/* Ensure global status register flag is cleared correctly. */
>> +		wrmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, BIT_ULL(i));
>> +		GUEST_ASSERT(!(rdmsr(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS) &
>> +			     BIT_ULL(i)));
>> +	}
>> +}
>> +
>> +
>>   static void guest_test_core_counters(void)
>>   {
>>   	guest_test_rdwr_core_counters();
>>   	guest_test_core_events();
>> +	guest_test_perfmon_v2();
>>   	GUEST_DONE();
>>   }

>> --
>> 2.46.0.662.g92d0881bb0-goog