[v4] KVM: x86: Provide a capability to disable APERF/MPERF read intercepts

[PATCH v4 3/3] KVM: selftests: Test behavior of KVM_X86_DISABLE_EXITS_APERFMPERF

Posted by Jim Mattson 8 months, 1 week ago

For a VCPU thread pinned to a single LPU, verify that interleaved host
and guest reads of IA32_[AM]PERF return strictly increasing values when
APERFMPERF exiting is disabled.

Signed-off-by: Jim Mattson <jmattson@google.com>
---
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 .../testing/selftests/kvm/include/kvm_util.h  |   2 +
 tools/testing/selftests/kvm/lib/kvm_util.c    |  17 +++
 .../selftests/kvm/x86/aperfmperf_test.c       | 132 ++++++++++++++++++
 4 files changed, 152 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86/aperfmperf_test.c

diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 3e786080473d..8d42a3bd0dd8 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -131,6 +131,7 @@ TEST_GEN_PROGS_x86 += x86/amx_test
 TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
 TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
 TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += x86/aperfmperf_test
 TEST_GEN_PROGS_x86 += access_tracking_perf_test
 TEST_GEN_PROGS_x86 += coalesced_io_test
 TEST_GEN_PROGS_x86 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 93013564428b..43a1bef10ec0 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -1158,4 +1158,6 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
 
 uint32_t guest_get_vcpuid(void);
 
+int pin_task_to_one_cpu(void);
+
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 5649cf2f40e8..b6c707ab92d7 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -10,6 +10,7 @@
 #include "ucall_common.h"
 
 #include <assert.h>
+#include <pthread.h>
 #include <sched.h>
 #include <sys/mman.h>
 #include <sys/resource.h>
@@ -2321,3 +2322,19 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
 	pg = paddr >> vm->page_shift;
 	return sparsebit_is_set(region->protected_phy_pages, pg);
 }
+
+int pin_task_to_one_cpu(void)
+{
+	int cpu = sched_getcpu();
+	cpu_set_t cpuset;
+	int rc;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	rc = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+	TEST_ASSERT(rc == 0, "%s: Can't set thread affinity", __func__);
+
+	return cpu;
+}
+
diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
new file mode 100644
index 000000000000..64d976156693
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
+ *
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
+ * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
+ * return the host's values.
+ *
+ * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <asm/msr-index.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define NUM_ITERATIONS 100
+
+static int open_dev_msr(int cpu)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
+	return open_path_or_exit(path, O_RDONLY);
+}
+
+static uint64_t read_dev_msr(int msr_fd, uint32_t msr)
+{
+	uint64_t data;
+	ssize_t rc;
+
+	rc = pread(msr_fd, &data, sizeof(data), msr);
+	TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
+
+	return data;
+}
+
+static void guest_code(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_ITERATIONS; i++)
+		GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	uint64_t host_aperf_before, host_mperf_before;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	int msr_fd;
+	int cpu;
+	int i;
+
+	cpu = pin_task_to_one_cpu();
+
+	msr_fd = open_dev_msr(cpu);
+
+	/*
+	 * This test requires a non-standard VM initialization, because
+	 * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
+	 * a VCPU has been created.
+	 */
+	vm = vm_create(1);
+
+	TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
+		     KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+	vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
+		      KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+	host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
+	host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+	for (i = 0; i < NUM_ITERATIONS; i++) {
+		uint64_t host_aperf_after, host_mperf_after;
+		uint64_t guest_aperf, guest_mperf;
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_DONE:
+			break;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+		case UCALL_SYNC:
+			guest_aperf = uc.args[0];
+			guest_mperf = uc.args[1];
+
+			host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
+			host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+			TEST_ASSERT(host_aperf_before < guest_aperf,
+				    "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+				    host_aperf_before, guest_aperf);
+			TEST_ASSERT(guest_aperf < host_aperf_after,
+				    "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+				    guest_aperf, host_aperf_after);
+			TEST_ASSERT(host_mperf_before < guest_mperf,
+				    "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+				    host_mperf_before, guest_mperf);
+			TEST_ASSERT(guest_mperf < host_mperf_after,
+				    "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+				    guest_mperf, host_mperf_after);
+
+			host_aperf_before = host_aperf_after;
+			host_mperf_before = host_mperf_after;
+
+			break;
+		}
+	}
+
+	kvm_vm_free(vm);
+	close(msr_fd);
+
+	return 0;
+}
-- 
2.49.0.1204.g71687c7c1d-goog

Re: [PATCH v4 3/3] KVM: selftests: Test behavior of KVM_X86_DISABLE_EXITS_APERFMPERF

Posted by Sean Christopherson 7 months, 2 weeks ago

On Fri, May 30, 2025, Jim Mattson wrote:
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 5649cf2f40e8..b6c707ab92d7 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -10,6 +10,7 @@
>  #include "ucall_common.h"
>  
>  #include <assert.h>
> +#include <pthread.h>
>  #include <sched.h>
>  #include <sys/mman.h>
>  #include <sys/resource.h>
> @@ -2321,3 +2322,19 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
>  	pg = paddr >> vm->page_shift;
>  	return sparsebit_is_set(region->protected_phy_pages, pg);
>  }
> +
> +int pin_task_to_one_cpu(void)
> +{
> +	int cpu = sched_getcpu();
> +	cpu_set_t cpuset;
> +	int rc;
> +
> +	CPU_ZERO(&cpuset);
> +	CPU_SET(cpu, &cpuset);
> +
> +	rc = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
> +	TEST_ASSERT(rc == 0, "%s: Can't set thread affinity", __func__);
> +
> +	return cpu;
> +}

There's already kvm_pin_this_task_to_pcpu(), which is *almost* what is needed
here.  I'll slot in a patch in v5 to expand that into a set of APIs, along with
a patch to convert the low hanging fruit (arch_timer tests).

Re: [PATCH v4 3/3] KVM: selftests: Test behavior of KVM_X86_DISABLE_EXITS_APERFMPERF

Posted by Mi, Dapeng 8 months ago

On 5/31/2025 2:52 AM, Jim Mattson wrote:
> For a VCPU thread pinned to a single LPU, verify that interleaved host
> and guest reads of IA32_[AM]PERF return strictly increasing values when
> APERFMPERF exiting is disabled.
>
> Signed-off-by: Jim Mattson <jmattson@google.com>
> ---
>  tools/testing/selftests/kvm/Makefile.kvm      |   1 +
>  .../testing/selftests/kvm/include/kvm_util.h  |   2 +
>  tools/testing/selftests/kvm/lib/kvm_util.c    |  17 +++
>  .../selftests/kvm/x86/aperfmperf_test.c       | 132 ++++++++++++++++++
>  4 files changed, 152 insertions(+)
>  create mode 100644 tools/testing/selftests/kvm/x86/aperfmperf_test.c
>
> diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
> index 3e786080473d..8d42a3bd0dd8 100644
> --- a/tools/testing/selftests/kvm/Makefile.kvm
> +++ b/tools/testing/selftests/kvm/Makefile.kvm
> @@ -131,6 +131,7 @@ TEST_GEN_PROGS_x86 += x86/amx_test
>  TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
>  TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
>  TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
> +TEST_GEN_PROGS_x86 += x86/aperfmperf_test
>  TEST_GEN_PROGS_x86 += access_tracking_perf_test
>  TEST_GEN_PROGS_x86 += coalesced_io_test
>  TEST_GEN_PROGS_x86 += dirty_log_perf_test
> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
> index 93013564428b..43a1bef10ec0 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
> @@ -1158,4 +1158,6 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
>  
>  uint32_t guest_get_vcpuid(void);
>  
> +int pin_task_to_one_cpu(void);
> +
>  #endif /* SELFTEST_KVM_UTIL_H */
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 5649cf2f40e8..b6c707ab92d7 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -10,6 +10,7 @@
>  #include "ucall_common.h"
>  
>  #include <assert.h>
> +#include <pthread.h>
>  #include <sched.h>
>  #include <sys/mman.h>
>  #include <sys/resource.h>
> @@ -2321,3 +2322,19 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
>  	pg = paddr >> vm->page_shift;
>  	return sparsebit_is_set(region->protected_phy_pages, pg);
>  }
> +
> +int pin_task_to_one_cpu(void)
> +{
> +	int cpu = sched_getcpu();
> +	cpu_set_t cpuset;
> +	int rc;
> +
> +	CPU_ZERO(&cpuset);
> +	CPU_SET(cpu, &cpuset);
> +
> +	rc = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
> +	TEST_ASSERT(rc == 0, "%s: Can't set thread affinity", __func__);
> +
> +	return cpu;
> +}
> +
> diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
> new file mode 100644
> index 000000000000..64d976156693
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
> @@ -0,0 +1,132 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
> + *
> + * Copyright (C) 2025, Google LLC.
> + *
> + * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
> + * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
> + * return the host's values.
> + *
> + * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
> + */
> +
> +#include <fcntl.h>
> +#include <limits.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <unistd.h>
> +#include <asm/msr-index.h>
> +
> +#include "kvm_util.h"
> +#include "processor.h"
> +#include "test_util.h"
> +
> +#define NUM_ITERATIONS 100
> +
> +static int open_dev_msr(int cpu)
> +{
> +	char path[PATH_MAX];
> +
> +	snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
> +	return open_path_or_exit(path, O_RDONLY);
> +}
> +
> +static uint64_t read_dev_msr(int msr_fd, uint32_t msr)
> +{
> +	uint64_t data;
> +	ssize_t rc;
> +
> +	rc = pread(msr_fd, &data, sizeof(data), msr);
> +	TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
> +
> +	return data;
> +}
> +
> +static void guest_code(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < NUM_ITERATIONS; i++)
> +		GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
> +
> +	GUEST_DONE();
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	uint64_t host_aperf_before, host_mperf_before;
> +	struct kvm_vcpu *vcpu;
> +	struct kvm_vm *vm;
> +	int msr_fd;
> +	int cpu;
> +	int i;
> +
> +	cpu = pin_task_to_one_cpu();
> +
> +	msr_fd = open_dev_msr(cpu);
> +
> +	/*
> +	 * This test requires a non-standard VM initialization, because
> +	 * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
> +	 * a VCPU has been created.
> +	 */
> +	vm = vm_create(1);
> +
> +	TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
> +		     KVM_X86_DISABLE_EXITS_APERFMPERF);
> +
> +	vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
> +		      KVM_X86_DISABLE_EXITS_APERFMPERF);
> +
> +	vcpu = vm_vcpu_add(vm, 0, guest_code);
> +
> +	host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
> +	host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
> +
> +	for (i = 0; i < NUM_ITERATIONS; i++) {
> +		uint64_t host_aperf_after, host_mperf_after;
> +		uint64_t guest_aperf, guest_mperf;
> +		struct ucall uc;
> +
> +		vcpu_run(vcpu);
> +		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
> +
> +		switch (get_ucall(vcpu, &uc)) {
> +		case UCALL_DONE:
> +			break;
> +		case UCALL_ABORT:
> +			REPORT_GUEST_ASSERT(uc);
> +		case UCALL_SYNC:
> +			guest_aperf = uc.args[0];
> +			guest_mperf = uc.args[1];
> +
> +			host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
> +			host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
> +
> +			TEST_ASSERT(host_aperf_before < guest_aperf,
> +				    "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
> +				    host_aperf_before, guest_aperf);
> +			TEST_ASSERT(guest_aperf < host_aperf_after,
> +				    "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
> +				    guest_aperf, host_aperf_after);
> +			TEST_ASSERT(host_mperf_before < guest_mperf,
> +				    "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
> +				    host_mperf_before, guest_mperf);
> +			TEST_ASSERT(guest_mperf < host_mperf_after,
> +				    "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
> +				    guest_mperf, host_mperf_after);

Should we consider the possible overflow case of these 2 MSRs although it
could be extremely rare? Thanks.


> +
> +			host_aperf_before = host_aperf_after;
> +			host_mperf_before = host_mperf_after;
> +
> +			break;
> +		}
> +	}
> +
> +	kvm_vm_free(vm);
> +	close(msr_fd);
> +
> +	return 0;
> +}

Re: [PATCH v4 3/3] KVM: selftests: Test behavior of KVM_X86_DISABLE_EXITS_APERFMPERF

Posted by Jim Mattson 8 months ago

On Tue, Jun 10, 2025 at 1:42 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>
>
> On 5/31/2025 2:52 AM, Jim Mattson wrote:
> > For a VCPU thread pinned to a single LPU, verify that interleaved host
> > and guest reads of IA32_[AM]PERF return strictly increasing values when
> > APERFMPERF exiting is disabled.

> Should we consider the possible overflow case of these 2 MSRs although it
> could be extremely rare? Thanks.

Unless someone moves the MSRs forward, at current frequencies, the
machine will have to be up for more than 100 years. I'll be long dead
by then.

Note that frequency invariant scheduling doesn't accommodate overflow
either. If the MSRs overflow, frequency invariant scheduling is
disabled.

Re: [PATCH v4 3/3] KVM: selftests: Test behavior of KVM_X86_DISABLE_EXITS_APERFMPERF

Posted by Mi, Dapeng 8 months ago

On 6/11/2025 12:59 AM, Jim Mattson wrote:
> On Tue, Jun 10, 2025 at 1:42 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>>
>> On 5/31/2025 2:52 AM, Jim Mattson wrote:
>>> For a VCPU thread pinned to a single LPU, verify that interleaved host
>>> and guest reads of IA32_[AM]PERF return strictly increasing values when
>>> APERFMPERF exiting is disabled.
>> Should we consider the possible overflow case of these 2 MSRs although it
>> could be extremely rare? Thanks.
> Unless someone moves the MSRs forward, at current frequencies, the
> machine will have to be up for more than 100 years. I'll be long dead
> by then.

😂


>
> Note that frequency invariant scheduling doesn't accommodate overflow
> either. If the MSRs overflow, frequency invariant scheduling is
> disabled.
Agree.

[PATCH v4 1/3] KVM: x86: Replace growing set of *_in_guest bools with a u64
[PATCH v4 2/3] KVM: x86: Provide a capability to disable APERF/MPERF read intercepts
[PATCH v4 3/3] KVM: selftests: Test behavior of KVM_X86_DISABLE_EXITS_APERFMPERF