[PATCH V3 4/4] KVM: selftests: Add nested page fault injection test

Kevin Cheng posted 4 patches 3 weeks, 4 days ago
[PATCH V3 4/4] KVM: selftests: Add nested page fault injection test
Posted by Kevin Cheng 3 weeks, 4 days ago
Add a test that exercises nested page fault injection during L2
execution. L2 executes I/O string instructions (OUTSB/INSB) that access
memory restricted in L1's nested page tables (NPT/EPT), triggering a
nested page fault that L0 must inject to L1.

The test supports both AMD SVM (NPF) and Intel VMX (EPT violation) and
verifies that:
  - The exit reason is an NPF/EPT violation
  - The access type and permission bits are correct
  - The faulting GPA is correct

Three test cases are implemented:
  - Unmap the final data page (final translation fault, OUTSB read)
  - Unmap a PT page (page walk fault, OUTSB read)
  - Write-protect the final data page (protection violation, INSB write)
  - Write-protect a PT page (protection violation on A/D update, OUTSB
    read)

Signed-off-by: Kevin Cheng <chengkev@google.com>
---
 tools/testing/selftests/kvm/Makefile.kvm      |   1 +
 .../selftests/kvm/x86/nested_npf_test.c       | 374 ++++++++++++++++++
 2 files changed, 375 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86/nested_npf_test.c

diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 3d372d78a275..9308e6100f27 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -94,6 +94,7 @@ TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test
 TEST_GEN_PROGS_x86 += x86/nested_emulation_test
 TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
 TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_npf_test
 TEST_GEN_PROGS_x86 += x86/nested_set_state_test
 TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
 TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
diff --git a/tools/testing/selftests/kvm/x86/nested_npf_test.c b/tools/testing/selftests/kvm/x86/nested_npf_test.c
new file mode 100644
index 000000000000..7725e5dc3a38
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_npf_test.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define EPT_VIOLATION_ACC_READ		BIT(0)
+#define EPT_VIOLATION_ACC_WRITE		BIT(1)
+#define EPT_VIOLATION_ACC_INSTR		BIT(2)
+#define EPT_VIOLATION_PROT_READ		BIT(3)
+#define EPT_VIOLATION_PROT_WRITE	BIT(4)
+#define EPT_VIOLATION_PROT_EXEC		BIT(5)
+#define EPT_VIOLATION_GVA_IS_VALID	BIT(7)
+#define EPT_VIOLATION_GVA_TRANSLATED	BIT(8)
+
+enum test_type {
+	TEST_FINAL_PAGE_UNMAPPED,	    /* Final data page not present */
+	TEST_PT_PAGE_UNMAPPED,		    /* Page table page not present */
+	TEST_FINAL_PAGE_WRITE_PROTECTED,    /* Final data page read-only */
+	TEST_PT_PAGE_WRITE_PROTECTED,	    /* Page table page read-only */
+};
+
+static vm_vaddr_t l2_test_page;
+static void (*l2_entry)(void);
+
+#define TEST_IO_PORT 0x80
+#define TEST1_VADDR 0x8000000ULL
+#define TEST2_VADDR 0x10000000ULL
+#define TEST3_VADDR 0x18000000ULL
+#define TEST4_VADDR 0x20000000ULL
+
+/*
+ * L2 executes OUTS reading from l2_test_page, triggering a nested page
+ * fault on the read access.
+ */
+static void l2_guest_code_outs(void)
+{
+	asm volatile("outsb" ::"S"(l2_test_page), "d"(TEST_IO_PORT) : "memory");
+	GUEST_FAIL("L2 should not reach here");
+}
+
+/*
+ * L2 executes INS writing to l2_test_page, triggering a nested page
+ * fault on the write access.
+ */
+static void l2_guest_code_ins(void)
+{
+	asm volatile("insb" ::"D"(l2_test_page), "d"(TEST_IO_PORT) : "memory");
+	GUEST_FAIL("L2 should not reach here");
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx, uint64_t expected_fault_gpa,
+			 uint64_t test_type)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uint64_t exit_qual;
+
+	GUEST_ASSERT(vmx->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+	GUEST_ASSERT(load_vmcs(vmx));
+
+	prepare_vmcs(vmx, l2_entry, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_ASSERT(!vmlaunch());
+
+	/* Verify we got an EPT violation exit */
+	__GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION,
+		       "Expected EPT violation (0x%x), got 0x%lx",
+		       EXIT_REASON_EPT_VIOLATION,
+		       vmreadz(VM_EXIT_REASON));
+
+	exit_qual = vmreadz(EXIT_QUALIFICATION);
+
+	switch (test_type) {
+	case TEST_FINAL_PAGE_UNMAPPED:
+		/* Read access, final translation, page not present */
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_ACC_READ,
+			       "Expected ACC_READ set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_GVA_IS_VALID,
+			       "Expected GVA_IS_VALID set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_GVA_TRANSLATED,
+			       "Expected GVA_TRANSLATED set, exit_qual 0x%lx",
+			       exit_qual);
+		break;
+	case TEST_PT_PAGE_UNMAPPED:
+		/* Read access, page walk fault, page not present */
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_ACC_READ,
+			       "Expected ACC_READ set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_GVA_IS_VALID,
+			       "Expected GVA_IS_VALID set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(!(exit_qual & EPT_VIOLATION_GVA_TRANSLATED),
+			       "Expected GVA_TRANSLATED clear, exit_qual 0x%lx",
+			       exit_qual);
+		break;
+	case TEST_FINAL_PAGE_WRITE_PROTECTED:
+		/* Write access, final translation, page present but read-only */
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_ACC_WRITE,
+			       "Expected ACC_WRITE set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_PROT_READ,
+			       "Expected PROT_READ set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(!(exit_qual & EPT_VIOLATION_PROT_WRITE),
+			       "Expected PROT_WRITE clear, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_GVA_IS_VALID,
+			       "Expected GVA_IS_VALID set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_GVA_TRANSLATED,
+			       "Expected GVA_TRANSLATED set, exit_qual 0x%lx",
+			       exit_qual);
+		break;
+	case TEST_PT_PAGE_WRITE_PROTECTED:
+		/* Write access (A/D update), page walk, page present but read-only */
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_ACC_WRITE,
+			       "Expected ACC_WRITE set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_PROT_READ,
+			       "Expected PROT_READ set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(!(exit_qual & EPT_VIOLATION_PROT_WRITE),
+			       "Expected PROT_WRITE clear, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(exit_qual & EPT_VIOLATION_GVA_IS_VALID,
+			       "Expected GVA_IS_VALID set, exit_qual 0x%lx",
+			       exit_qual);
+		__GUEST_ASSERT(!(exit_qual & EPT_VIOLATION_GVA_TRANSLATED),
+			       "Expected GVA_TRANSLATED clear, exit_qual 0x%lx",
+			       exit_qual);
+		break;
+	}
+
+	__GUEST_ASSERT(vmreadz(GUEST_PHYSICAL_ADDRESS) == expected_fault_gpa,
+		       "Expected guest_physical_address = 0x%lx, got 0x%lx",
+		       expected_fault_gpa,
+		       vmreadz(GUEST_PHYSICAL_ADDRESS));
+
+	GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm, uint64_t expected_fault_gpa,
+			 uint64_t test_type)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+	uint64_t exit_info_1;
+
+	generic_svm_setup(svm, l2_entry,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	run_guest(vmcb, svm->vmcb_gpa);
+
+	/* Verify we got an NPF exit */
+	__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_NPF,
+		       "Expected NPF exit (0x%x), got 0x%lx", SVM_EXIT_NPF,
+		       vmcb->control.exit_code);
+
+	exit_info_1 = vmcb->control.exit_info_1;
+
+	switch (test_type) {
+	case TEST_FINAL_PAGE_UNMAPPED:
+		/* Read access, final translation, page not present */
+		__GUEST_ASSERT(exit_info_1 & PFERR_GUEST_FINAL_MASK,
+			       "Expected GUEST_FINAL set, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(!(exit_info_1 & PFERR_GUEST_PAGE_MASK),
+			       "Expected GUEST_PAGE clear, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(!(exit_info_1 & PFERR_PRESENT_MASK),
+			       "Expected PRESENT clear, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		break;
+	case TEST_PT_PAGE_UNMAPPED:
+		/* Read access, page walk fault, page not present */
+		__GUEST_ASSERT(exit_info_1 & PFERR_GUEST_PAGE_MASK,
+			       "Expected GUEST_PAGE set, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(!(exit_info_1 & PFERR_GUEST_FINAL_MASK),
+			       "Expected GUEST_FINAL clear, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(!(exit_info_1 & PFERR_PRESENT_MASK),
+			       "Expected PRESENT clear, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		break;
+	case TEST_FINAL_PAGE_WRITE_PROTECTED:
+		/* Write access, final translation, page present but read-only */
+		__GUEST_ASSERT(exit_info_1 & PFERR_GUEST_FINAL_MASK,
+			       "Expected GUEST_FINAL set, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(!(exit_info_1 & PFERR_GUEST_PAGE_MASK),
+			       "Expected GUEST_PAGE clear, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(exit_info_1 & PFERR_PRESENT_MASK,
+			       "Expected PRESENT set, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(exit_info_1 & PFERR_WRITE_MASK,
+			       "Expected WRITE set, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		break;
+	case TEST_PT_PAGE_WRITE_PROTECTED:
+		/* Write access (A/D update), page walk, page present but read-only */
+		__GUEST_ASSERT(exit_info_1 & PFERR_GUEST_PAGE_MASK,
+			       "Expected GUEST_PAGE set, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(!(exit_info_1 & PFERR_GUEST_FINAL_MASK),
+			       "Expected GUEST_FINAL clear, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(exit_info_1 & PFERR_PRESENT_MASK,
+			       "Expected PRESENT set, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		__GUEST_ASSERT(exit_info_1 & PFERR_WRITE_MASK,
+			       "Expected WRITE set, exit_info_1 0x%lx",
+			       (unsigned long)exit_info_1);
+		break;
+	}
+
+	__GUEST_ASSERT(vmcb->control.exit_info_2 == expected_fault_gpa,
+		       "Expected exit_info_2 = 0x%lx, got 0x%lx",
+		       expected_fault_gpa,
+		       vmcb->control.exit_info_2);
+
+	GUEST_DONE();
+}
+
+static void l1_guest_code(void *data, uint64_t expected_fault_gpa,
+			  uint64_t test_type)
+{
+	if (this_cpu_has(X86_FEATURE_VMX))
+		l1_vmx_code(data, expected_fault_gpa, test_type);
+	else
+		l1_svm_code(data, expected_fault_gpa, test_type);
+}
+
+/* Returns the GPA of the PT page that maps @vaddr. */
+static uint64_t get_pt_gpa_for_vaddr(struct kvm_vm *vm, uint64_t vaddr)
+{
+	uint64_t *pte;
+
+	pte = vm_get_pte(vm, vaddr);
+	TEST_ASSERT(pte && (*pte & 0x1), "PTE not present for vaddr 0x%lx",
+		    (unsigned long)vaddr);
+
+	return addr_hva2gpa(vm, (void *)((uint64_t)pte & ~0xFFFULL));
+}
+
+static void run_test(enum test_type type)
+{
+	vm_paddr_t expected_fault_gpa;
+	vm_vaddr_t nested_gva;
+
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+	vm_enable_tdp(vm);
+
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		vcpu_alloc_vmx(vm, &nested_gva);
+	else
+		vcpu_alloc_svm(vm, &nested_gva);
+
+	switch (type) {
+	case TEST_FINAL_PAGE_UNMAPPED:
+		/*
+		 * Unmap the final data page from NPT/EPT. The guest page
+		 * table walk succeeds, but the final GPA->HPA translation
+		 * fails. L2 reads from the page via OUTS.
+		 */
+		l2_entry = l2_guest_code_outs;
+		l2_test_page = vm_vaddr_alloc(vm, vm->page_size, TEST1_VADDR);
+		expected_fault_gpa = addr_gva2gpa(vm, l2_test_page);
+		break;
+	case TEST_PT_PAGE_UNMAPPED:
+		/*
+		 * Unmap a page table page from NPT/EPT. The hardware page
+		 * table walk fails when translating the PT page's GPA
+		 * through NPT/EPT. L2 reads from the page via OUTS.
+		 */
+		l2_entry = l2_guest_code_outs;
+		l2_test_page = vm_vaddr_alloc(vm, vm->page_size, TEST2_VADDR);
+		expected_fault_gpa = get_pt_gpa_for_vaddr(vm, l2_test_page);
+		break;
+	case TEST_FINAL_PAGE_WRITE_PROTECTED:
+		/*
+		 * Write-protect the final data page in NPT/EPT.  The page
+		 * is present and readable, but not writable.  L2 writes to
+		 * the page via INS, triggering a protection violation.
+		 */
+		l2_entry = l2_guest_code_ins;
+		l2_test_page = vm_vaddr_alloc(vm, vm->page_size, TEST3_VADDR);
+		expected_fault_gpa = addr_gva2gpa(vm, l2_test_page);
+		break;
+	case TEST_PT_PAGE_WRITE_PROTECTED:
+		/*
+		 * Write-protect a page table page in NPT/EPT.  The page is
+		 * present and readable, but not writable.  The guest page
+		 * table walk needs write access to set A/D bits, so it
+		 * triggers a protection violation on the PT page.
+		 * L2 reads from the page via OUTS.
+		 */
+		l2_entry = l2_guest_code_outs;
+		l2_test_page = vm_vaddr_alloc(vm, vm->page_size, TEST4_VADDR);
+		expected_fault_gpa = get_pt_gpa_for_vaddr(vm, l2_test_page);
+		break;
+	}
+
+	tdp_identity_map_default_memslots(vm);
+
+	if (type == TEST_FINAL_PAGE_WRITE_PROTECTED ||
+	    type == TEST_PT_PAGE_WRITE_PROTECTED)
+		*tdp_get_pte(vm, expected_fault_gpa) &= ~PTE_WRITABLE_MASK(&vm->stage2_mmu);
+	else
+		*tdp_get_pte(vm, expected_fault_gpa) &= ~(PTE_PRESENT_MASK(&vm->stage2_mmu) |
+							   PTE_READABLE_MASK(&vm->stage2_mmu) |
+							   PTE_WRITABLE_MASK(&vm->stage2_mmu) |
+							   PTE_EXECUTABLE_MASK(&vm->stage2_mmu));
+
+	sync_global_to_guest(vm, l2_entry);
+	sync_global_to_guest(vm, l2_test_page);
+	vcpu_args_set(vcpu, 3, nested_gva, expected_fault_gpa, (uint64_t)type);
+
+	/*
+	 * For the INS-based write test, KVM emulates the instruction and
+	 * first reads from the I/O port, which exits to userspace.
+	 * Re-enter the guest so emulation can proceed to the memory
+	 * write, where the nested page fault is triggered.
+	 */
+	for (;;) {
+		vcpu_run(vcpu);
+
+		if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+		    vcpu->run->io.port == TEST_IO_PORT &&
+		    vcpu->run->io.direction == KVM_EXIT_IO_IN) {
+			continue;
+		}
+		break;
+	}
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+	default:
+		TEST_FAIL("Unexpected exit reason: %d", vcpu->run->exit_reason);
+	}
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM));
+	TEST_REQUIRE(kvm_cpu_has_tdp());
+
+	run_test(TEST_FINAL_PAGE_UNMAPPED);
+	run_test(TEST_PT_PAGE_UNMAPPED);
+	run_test(TEST_FINAL_PAGE_WRITE_PROTECTED);
+	run_test(TEST_PT_PAGE_WRITE_PROTECTED);
+
+	return 0;
+}
-- 
2.53.0.851.ga537e3e6e9-goog