[PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs

Yosry Ahmed posted 16 patches 2 months, 1 week ago
[PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs
Posted by Yosry Ahmed 2 months, 1 week ago
__tdp_pg_map() bears a lot of resemblence to __virt_pg_map(). The
main differences are:
- It uses the EPT struct overlay instead of the PTE masks.
- It always assumes 4-level EPTs.

To reuse __virt_pg_map(), initialize the PTE masks in nested MMU with
EPT PTE masks. EPTs have no 'present' or 'user' bits, so use the
'readable' bit instead like shadow_{present/user}_mask, ignoring the
fact that entries can be present and not readable if the CPU has
VMX_EPT_EXECUTE_ONLY_BIT.  This is simple and sufficient for testing.

Add an executable bitmask and update __virt_pg_map() and friends to set
the bit on newly created entries to match the EPT behavior. It's a nop
for x86 page tables.

Another benefit of reusing the code is having separate handling for
upper-level PTEs vs 4K PTEs, which avoids some quirks like setting the
large bit on a 4K PTE in the EPTs.

No functional change intended.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
---
 .../selftests/kvm/include/x86/processor.h     |   3 +
 .../testing/selftests/kvm/lib/x86/processor.c |  12 +-
 tools/testing/selftests/kvm/lib/x86/vmx.c     | 115 ++++--------------
 3 files changed, 33 insertions(+), 97 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index fb2b2e53d453..62e10b296719 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1447,6 +1447,7 @@ struct pte_masks {
 	uint64_t dirty;
 	uint64_t huge;
 	uint64_t nx;
+	uint64_t x;
 	uint64_t c;
 	uint64_t s;
 };
@@ -1464,6 +1465,7 @@ struct kvm_mmu {
 #define PTE_DIRTY_MASK(mmu) ((mmu)->pte_masks.dirty)
 #define PTE_HUGE_MASK(mmu) ((mmu)->pte_masks.huge)
 #define PTE_NX_MASK(mmu) ((mmu)->pte_masks.nx)
+#define PTE_X_MASK(mmu) ((mmu)->pte_masks.x)
 #define PTE_C_MASK(mmu) ((mmu)->pte_masks.c)
 #define PTE_S_MASK(mmu) ((mmu)->pte_masks.s)
 
@@ -1474,6 +1476,7 @@ struct kvm_mmu {
 #define pte_dirty(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
 #define pte_huge(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
 #define pte_nx(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
+#define pte_x(mmu, pte) (!!(*(pte) & PTE_X_MASK(mmu)))
 #define pte_c(mmu, pte) (!!(*(pte) & PTE_C_MASK(mmu)))
 #define pte_s(mmu, pte) (!!(*(pte) & PTE_S_MASK(mmu)))
 
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index bff75ff05364..8b0e17f8ca37 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -162,8 +162,7 @@ struct kvm_mmu *mmu_create(struct kvm_vm *vm, int pgtable_levels,
 	struct kvm_mmu *mmu = calloc(1, sizeof(*mmu));
 
 	TEST_ASSERT(mmu, "-ENOMEM when allocating MMU");
-	if (pte_masks)
-		mmu->pte_masks = *pte_masks;
+	mmu->pte_masks = *pte_masks;
 	mmu->root_gpa = vm_alloc_page_table(vm);
 	mmu->pgtable_levels = pgtable_levels;
 	return mmu;
@@ -179,6 +178,7 @@ static void mmu_init(struct kvm_vm *vm)
 		.dirty		=	BIT_ULL(6),
 		.huge		=	BIT_ULL(7),
 		.nx		=	BIT_ULL(63),
+		.x		=	0,
 		.c		=	vm->arch.c_bit,
 		.s		=	vm->arch.s_bit,
 	};
@@ -225,7 +225,7 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
 	paddr = vm_untag_gpa(vm, paddr);
 
 	if (!pte_present(mmu, pte)) {
-		*pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu);
+		*pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | PTE_X_MASK(mmu);
 		if (current_level == target_level)
 			*pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
 		else
@@ -271,6 +271,9 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
 	TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
 		    "Unexpected bits in paddr: %lx", paddr);
 
+	TEST_ASSERT(!PTE_X_MASK(mmu) || !PTE_NX_MASK(mmu),
+		    "X and NX bit masks cannot be used simultaneously");
+
 	/*
 	 * Allocate upper level page tables, if not already present.  Return
 	 * early if a hugepage was created.
@@ -288,7 +291,8 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
 	pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
 	TEST_ASSERT(!pte_present(mmu, pte),
 		    "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
-	*pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
+	*pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | PTE_X_MASK(mmu)
+		| (paddr & PHYSICAL_PAGE_MASK);
 
 	/*
 	 * Neither SEV nor TDX supports shared page tables, so only the final
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index a909fad57fd5..0cba31cae896 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -25,21 +25,6 @@ bool enable_evmcs;
 struct hv_enlightened_vmcs *current_evmcs;
 struct hv_vp_assist_page *current_vp_assist;
 
-struct eptPageTableEntry {
-	uint64_t readable:1;
-	uint64_t writable:1;
-	uint64_t executable:1;
-	uint64_t memory_type:3;
-	uint64_t ignore_pat:1;
-	uint64_t page_size:1;
-	uint64_t accessed:1;
-	uint64_t dirty:1;
-	uint64_t ignored_11_10:2;
-	uint64_t address:40;
-	uint64_t ignored_62_52:11;
-	uint64_t suppress_ve:1;
-};
-
 int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
 {
 	uint16_t evmcs_ver;
@@ -58,12 +43,31 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
 
 void vm_enable_ept(struct kvm_vm *vm)
 {
+	struct pte_masks pte_masks;
+
 	TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
 	if (vm->arch.nested.mmu)
 		return;
 
+	/*
+	 * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
+	 * 'readable' bit. In some cases, EPTs can be execute-only and an entry
+	 * is present but not readable. However, for the purposes of testing we
+	 * assume 'present' == 'user' == 'readable' for simplicity.
+	 */
+	pte_masks = (struct pte_masks){
+		.present	=	BIT_ULL(0),
+		.user		=	BIT_ULL(0),
+		.writable	=	BIT_ULL(1),
+		.x		=	BIT_ULL(2),
+		.accessed	=	BIT_ULL(5),
+		.dirty		=	BIT_ULL(6),
+		.huge		=	BIT_ULL(7),
+		.nx		=	0,
+	};
+
 	/* EPTP_PWL_4 is always used */
-	vm->arch.nested.mmu = mmu_create(vm, 4, NULL);
+	vm->arch.nested.mmu = mmu_create(vm, 4, &pte_masks);
 }
 
 /* Allocate memory regions for nested VMX tests.
@@ -372,82 +376,6 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
 	init_vmcs_guest_state(guest_rip, guest_rsp);
 }
 
-static void tdp_create_pte(struct kvm_vm *vm,
-			   struct eptPageTableEntry *pte,
-			   uint64_t nested_paddr,
-			   uint64_t paddr,
-			   int current_level,
-			   int target_level)
-{
-	if (!pte->readable) {
-		pte->writable = true;
-		pte->readable = true;
-		pte->executable = true;
-		pte->page_size = (current_level == target_level);
-		if (pte->page_size)
-			pte->address = paddr >> vm->page_shift;
-		else
-			pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
-	} else {
-		/*
-		 * Entry already present.  Assert that the caller doesn't want
-		 * a hugepage at this level, and that there isn't a hugepage at
-		 * this level.
-		 */
-		TEST_ASSERT(current_level != target_level,
-			    "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
-			    current_level, nested_paddr);
-		TEST_ASSERT(!pte->page_size,
-			    "Cannot create page table at level: %u, nested_paddr: 0x%lx",
-			    current_level, nested_paddr);
-	}
-}
-
-
-void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
-		  int target_level)
-{
-	const uint64_t page_size = PG_LEVEL_SIZE(target_level);
-	void *eptp_hva = addr_gpa2hva(vm, vm->arch.nested.mmu->root_gpa);
-	struct eptPageTableEntry *pt = eptp_hva, *pte;
-	uint16_t index;
-
-	TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
-		    "Unknown or unsupported guest mode: 0x%x", vm->mode);
-
-	TEST_ASSERT((nested_paddr >> 48) == 0,
-		    "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
-		    nested_paddr);
-	TEST_ASSERT((nested_paddr % page_size) == 0,
-		    "Nested physical address not on page boundary,\n"
-		    "  nested_paddr: 0x%lx page_size: 0x%lx",
-		    nested_paddr, page_size);
-	TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
-		    "Physical address beyond beyond maximum supported,\n"
-		    "  nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-		    paddr, vm->max_gfn, vm->page_size);
-	TEST_ASSERT((paddr % page_size) == 0,
-		    "Physical address not on page boundary,\n"
-		    "  paddr: 0x%lx page_size: 0x%lx",
-		    paddr, page_size);
-	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
-		    "Physical address beyond beyond maximum supported,\n"
-		    "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-		    paddr, vm->max_gfn, vm->page_size);
-
-	for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
-		index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
-		pte = &pt[index];
-
-		tdp_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
-		if (pte->page_size)
-			break;
-
-		pt = addr_gpa2hva(vm, pte->address * vm->page_size);
-	}
-}
-
 /*
  * Map a range of EPT guest physical addresses to the VM's physical address
  *
@@ -468,6 +396,7 @@ void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
 void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
 	       uint64_t size, int level)
 {
+	struct kvm_mmu *mmu = vm->arch.nested.mmu;
 	size_t page_size = PG_LEVEL_SIZE(level);
 	size_t npages = size / page_size;
 
@@ -475,7 +404,7 @@ void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
 
 	while (npages--) {
-		__tdp_pg_map(vm, nested_paddr, paddr, level);
+		__virt_pg_map(vm, mmu, nested_paddr, paddr, level);
 		nested_paddr += page_size;
 		paddr += page_size;
 	}
-- 
2.52.0.158.g65b55ccf14-goog
Re: [PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs
Posted by Sean Christopherson 1 month, 2 weeks ago
On Thu, Nov 27, 2025, Yosry Ahmed wrote:
> +	/*
> +	 * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
> +	 * 'readable' bit. In some cases, EPTs can be execute-only and an entry
> +	 * is present but not readable. However, for the purposes of testing we
> +	 * assume 'present' == 'user' == 'readable' for simplicity.
> +	 */
> +	pte_masks = (struct pte_masks){
> +		.present	=	BIT_ULL(0),
> +		.user		=	BIT_ULL(0),
> +		.writable	=	BIT_ULL(1),
> +		.x		=	BIT_ULL(2),
> +		.accessed	=	BIT_ULL(5),
> +		.dirty		=	BIT_ULL(6),

Almost forgot, the Accessed and Dirty bits are wrong.  They are bits 8 and 9
respectively, not 5 and 6.  Amusingly (well, it's amusing *now*, it wasn't so
amusing at the time), I found that out when I couldn't get KVM to create a writable
SPTE on a read fault in the nested dirty log test :-)
Re: [PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs
Posted by Yosry Ahmed 1 month, 2 weeks ago
On Tue, Dec 23, 2025 at 03:14:28PM -0800, Sean Christopherson wrote:
> On Thu, Nov 27, 2025, Yosry Ahmed wrote:
> > +	/*
> > +	 * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
> > +	 * 'readable' bit. In some cases, EPTs can be execute-only and an entry
> > +	 * is present but not readable. However, for the purposes of testing we
> > +	 * assume 'present' == 'user' == 'readable' for simplicity.
> > +	 */
> > +	pte_masks = (struct pte_masks){
> > +		.present	=	BIT_ULL(0),
> > +		.user		=	BIT_ULL(0),
> > +		.writable	=	BIT_ULL(1),
> > +		.x		=	BIT_ULL(2),
> > +		.accessed	=	BIT_ULL(5),
> > +		.dirty		=	BIT_ULL(6),
> 
> Almost forgot, the Accessed and Dirty bits are wrong.  They are bits 8 and 9
> respectively, not 5 and 6.  Amusingly (well, it's amusing *now*, it wasn't so
> amusing at the time), I found that out when I couldn't get KVM to create a writable
> SPTE on a read fault in the nested dirty log test :-)

Instead of being a reasonable person and own up to my mistake, I will
blame Intel for putting the bits there to begin with :P

(But seriously, sorry for such a dumb mistake)
Re: [PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs
Posted by Sean Christopherson 1 month, 2 weeks ago
On Thu, Nov 27, 2025, Yosry Ahmed wrote:
> __tdp_pg_map() bears a lot of resemblence to __virt_pg_map(). The
> main differences are:
> - It uses the EPT struct overlay instead of the PTE masks.
> - It always assumes 4-level EPTs.
> 
> To reuse __virt_pg_map(), initialize the PTE masks in nested MMU with
> EPT PTE masks. EPTs have no 'present' or 'user' bits, so use the
> 'readable' bit instead like shadow_{present/user}_mask, ignoring the
> fact that entries can be present and not readable if the CPU has
> VMX_EPT_EXECUTE_ONLY_BIT.  This is simple and sufficient for testing.

Ugh, no.  I am strongly against playing the same insane games KVM itself plays
with overloading protectin/access bits.  There's no reason for selftests to do
the same, e.g. selftests aren't shadowing guest PTEs and doing permission checks
in hot paths and so don't need to multiplex a bunch of things into an inscrutable
(but performant!) system.

> Add an executable bitmask and update __virt_pg_map() and friends to set
> the bit on newly created entries to match the EPT behavior. It's a nop
> for x86 page tables.
> 
> Another benefit of reusing the code is having separate handling for
> upper-level PTEs vs 4K PTEs, which avoids some quirks like setting the
> large bit on a 4K PTE in the EPTs.
> 
> No functional change intended.
> 
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> ---
>  .../selftests/kvm/include/x86/processor.h     |   3 +
>  .../testing/selftests/kvm/lib/x86/processor.c |  12 +-
>  tools/testing/selftests/kvm/lib/x86/vmx.c     | 115 ++++--------------
>  3 files changed, 33 insertions(+), 97 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
> index fb2b2e53d453..62e10b296719 100644
> --- a/tools/testing/selftests/kvm/include/x86/processor.h
> +++ b/tools/testing/selftests/kvm/include/x86/processor.h
> @@ -1447,6 +1447,7 @@ struct pte_masks {
>  	uint64_t dirty;
>  	uint64_t huge;
>  	uint64_t nx;
> +	uint64_t x;

To be consistent with e.g. writable, call this executable.

>  	uint64_t c;
>  	uint64_t s;
>  };
> @@ -1464,6 +1465,7 @@ struct kvm_mmu {
>  #define PTE_DIRTY_MASK(mmu) ((mmu)->pte_masks.dirty)
>  #define PTE_HUGE_MASK(mmu) ((mmu)->pte_masks.huge)
>  #define PTE_NX_MASK(mmu) ((mmu)->pte_masks.nx)
> +#define PTE_X_MASK(mmu) ((mmu)->pte_masks.x)
>  #define PTE_C_MASK(mmu) ((mmu)->pte_masks.c)
>  #define PTE_S_MASK(mmu) ((mmu)->pte_masks.s)
>  
> @@ -1474,6 +1476,7 @@ struct kvm_mmu {
>  #define pte_dirty(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
>  #define pte_huge(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
>  #define pte_nx(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
> +#define pte_x(mmu, pte) (!!(*(pte) & PTE_X_MASK(mmu)))

And then here to not assume PRESENT == READABLE, just check if the MMU even has
a PRESENT bit.  We may still need changes, e.g. the page table builders actually
need to verify a PTE is _writable_, not just present, but that's largely an
orthogonal issue.

#define is_present_pte(mmu, pte)		\
	(PTE_PRESENT_MASK(mmu) ?		\
	 !!(*(pte) & PTE_PRESENT_MASK(mmu)) :	\
	 !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))

And to properly capture the relationship between NX and EXECUTABLE:

#define is_executable_pte(mmu, pte)	\
	((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))

#define is_nx_pte(mmu, pte)		(!is_executable_pte(mmu, pte))

>  #define pte_c(mmu, pte) (!!(*(pte) & PTE_C_MASK(mmu)))
>  #define pte_s(mmu, pte) (!!(*(pte) & PTE_S_MASK(mmu)))
>  
> diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
> index bff75ff05364..8b0e17f8ca37 100644
> --- a/tools/testing/selftests/kvm/lib/x86/processor.c
> +++ b/tools/testing/selftests/kvm/lib/x86/processor.c
> @@ -162,8 +162,7 @@ struct kvm_mmu *mmu_create(struct kvm_vm *vm, int pgtable_levels,
>  	struct kvm_mmu *mmu = calloc(1, sizeof(*mmu));
>  
>  	TEST_ASSERT(mmu, "-ENOMEM when allocating MMU");
> -	if (pte_masks)
> -		mmu->pte_masks = *pte_masks;
> +	mmu->pte_masks = *pte_masks;

Rather than pass NULL (and allow NULL here) in the previous patch, pass an
empty pte_masks.  That avoids churning the MMU initialization code, and allows
for a better TODO in the previous patch.

> +	/*
> +	 * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
> +	 * 'readable' bit. In some cases, EPTs can be execute-only and an entry
> +	 * is present but not readable. However, for the purposes of testing we
> +	 * assume 'present' == 'user' == 'readable' for simplicity.
> +	 */
> +	pte_masks = (struct pte_masks){
> +		.present	=	BIT_ULL(0),
> +		.user		=	BIT_ULL(0),
> +		.writable	=	BIT_ULL(1),
> +		.x		=	BIT_ULL(2),
> +		.accessed	=	BIT_ULL(5),
> +		.dirty		=	BIT_ULL(6),
> +		.huge		=	BIT_ULL(7),
> +		.nx		=	0,
> +	};
> +
>  	/* EPTP_PWL_4 is always used */

Make this a TODO, e.g.

	/* TODO: Add support for 5-level paging. */

so that it's clear this is a shortcoming, not some fundamental property of
selftests.
Re: [PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs
Posted by Yosry Ahmed 1 month, 2 weeks ago
On Tue, Dec 23, 2025 at 03:12:09PM -0800, Sean Christopherson wrote:
> On Thu, Nov 27, 2025, Yosry Ahmed wrote:
> > __tdp_pg_map() bears a lot of resemblence to __virt_pg_map(). The
> > main differences are:
> > - It uses the EPT struct overlay instead of the PTE masks.
> > - It always assumes 4-level EPTs.
> > 
> > To reuse __virt_pg_map(), initialize the PTE masks in nested MMU with
> > EPT PTE masks. EPTs have no 'present' or 'user' bits, so use the
> > 'readable' bit instead like shadow_{present/user}_mask, ignoring the
> > fact that entries can be present and not readable if the CPU has
> > VMX_EPT_EXECUTE_ONLY_BIT.  This is simple and sufficient for testing.
> 
> Ugh, no.  I am strongly against playing the same insane games KVM itself plays
> with overloading protectin/access bits.  There's no reason for selftests to do
> the same, e.g. selftests aren't shadowing guest PTEs and doing permission checks
> in hot paths and so don't need to multiplex a bunch of things into an inscrutable
> (but performant!) system.

I was trying to stay consistent with the KVM code (rather than care
about performance), but if you'd rather simplify things here then I am
fine with that too.

> 
> > Add an executable bitmask and update __virt_pg_map() and friends to set
> > the bit on newly created entries to match the EPT behavior. It's a nop
> > for x86 page tables.
> > 
> > Another benefit of reusing the code is having separate handling for
> > upper-level PTEs vs 4K PTEs, which avoids some quirks like setting the
> > large bit on a 4K PTE in the EPTs.
> > 
> > No functional change intended.
> > 
> > Suggested-by: Sean Christopherson <seanjc@google.com>
> > Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> > ---
> >  .../selftests/kvm/include/x86/processor.h     |   3 +
> >  .../testing/selftests/kvm/lib/x86/processor.c |  12 +-
> >  tools/testing/selftests/kvm/lib/x86/vmx.c     | 115 ++++--------------
> >  3 files changed, 33 insertions(+), 97 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
> > index fb2b2e53d453..62e10b296719 100644
> > --- a/tools/testing/selftests/kvm/include/x86/processor.h
> > +++ b/tools/testing/selftests/kvm/include/x86/processor.h
> > @@ -1447,6 +1447,7 @@ struct pte_masks {
> >  	uint64_t dirty;
> >  	uint64_t huge;
> >  	uint64_t nx;
> > +	uint64_t x;
> 
> To be consistent with e.g. writable, call this executable.

Was trying to be consistent with 'nx' :) 

> 
> >  	uint64_t c;
> >  	uint64_t s;
> >  };
> > @@ -1464,6 +1465,7 @@ struct kvm_mmu {
> >  #define PTE_DIRTY_MASK(mmu) ((mmu)->pte_masks.dirty)
> >  #define PTE_HUGE_MASK(mmu) ((mmu)->pte_masks.huge)
> >  #define PTE_NX_MASK(mmu) ((mmu)->pte_masks.nx)
> > +#define PTE_X_MASK(mmu) ((mmu)->pte_masks.x)
> >  #define PTE_C_MASK(mmu) ((mmu)->pte_masks.c)
> >  #define PTE_S_MASK(mmu) ((mmu)->pte_masks.s)
> >  
> > @@ -1474,6 +1476,7 @@ struct kvm_mmu {
> >  #define pte_dirty(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
> >  #define pte_huge(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
> >  #define pte_nx(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
> > +#define pte_x(mmu, pte) (!!(*(pte) & PTE_X_MASK(mmu)))
> 
> And then here to not assume PRESENT == READABLE, just check if the MMU even has
> a PRESENT bit.  We may still need changes, e.g. the page table builders actually
> need to verify a PTE is _writable_, not just present, but that's largely an
> orthogonal issue.

Not sure what you mean? How is the PTE being writable relevant to
assuming PRESENT == READABLE?

> 
> #define is_present_pte(mmu, pte)		\
> 	(PTE_PRESENT_MASK(mmu) ?		\
> 	 !!(*(pte) & PTE_PRESENT_MASK(mmu)) :	\
> 	 !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))

and then Intel will introduce VMX_EPT_WRITE_ONLY_BIT :P

> 
> And to properly capture the relationship between NX and EXECUTABLE:
> 
> #define is_executable_pte(mmu, pte)	\
> 	((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))

Yeah that's a lot better.

> 
> #define is_nx_pte(mmu, pte)		(!is_executable_pte(mmu, pte))
> 
> >  #define pte_c(mmu, pte) (!!(*(pte) & PTE_C_MASK(mmu)))
> >  #define pte_s(mmu, pte) (!!(*(pte) & PTE_S_MASK(mmu)))
> >  
> > diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
> > index bff75ff05364..8b0e17f8ca37 100644
> > --- a/tools/testing/selftests/kvm/lib/x86/processor.c
> > +++ b/tools/testing/selftests/kvm/lib/x86/processor.c
> > @@ -162,8 +162,7 @@ struct kvm_mmu *mmu_create(struct kvm_vm *vm, int pgtable_levels,
> >  	struct kvm_mmu *mmu = calloc(1, sizeof(*mmu));
> >  
> >  	TEST_ASSERT(mmu, "-ENOMEM when allocating MMU");
> > -	if (pte_masks)
> > -		mmu->pte_masks = *pte_masks;
> > +	mmu->pte_masks = *pte_masks;
> 
> Rather than pass NULL (and allow NULL here) in the previous patch, pass an
> empty pte_masks.  That avoids churning the MMU initialization code, and allows
> for a better TODO in the previous patch.

Makes sense.

> 
> > +	/*
> > +	 * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
> > +	 * 'readable' bit. In some cases, EPTs can be execute-only and an entry
> > +	 * is present but not readable. However, for the purposes of testing we
> > +	 * assume 'present' == 'user' == 'readable' for simplicity.
> > +	 */
> > +	pte_masks = (struct pte_masks){
> > +		.present	=	BIT_ULL(0),
> > +		.user		=	BIT_ULL(0),
> > +		.writable	=	BIT_ULL(1),
> > +		.x		=	BIT_ULL(2),
> > +		.accessed	=	BIT_ULL(5),
> > +		.dirty		=	BIT_ULL(6),
> > +		.huge		=	BIT_ULL(7),
> > +		.nx		=	0,
> > +	};
> > +
> >  	/* EPTP_PWL_4 is always used */
> 
> Make this a TODO, e.g.
> 
> 	/* TODO: Add support for 5-level paging. */
> 
> so that it's clear this is a shortcoming, not some fundamental property of
> selftests.

Makes sense.
Re: [PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs
Posted by Sean Christopherson 1 month, 1 week ago
On Tue, Dec 23, 2025, Yosry Ahmed wrote:
> On Tue, Dec 23, 2025 at 03:12:09PM -0800, Sean Christopherson wrote:
> > On Thu, Nov 27, 2025, Yosry Ahmed wrote:
> > > diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
> > > index fb2b2e53d453..62e10b296719 100644
> > > --- a/tools/testing/selftests/kvm/include/x86/processor.h
> > > +++ b/tools/testing/selftests/kvm/include/x86/processor.h
> > > @@ -1447,6 +1447,7 @@ struct pte_masks {
> > >  	uint64_t dirty;
> > >  	uint64_t huge;
> > >  	uint64_t nx;
> > > +	uint64_t x;
> > 
> > To be consistent with e.g. writable, call this executable.
> 
> Was trying to be consistent with 'nx' :) 
> 
> > 
> > >  	uint64_t c;
> > >  	uint64_t s;
> > >  };
> > > @@ -1464,6 +1465,7 @@ struct kvm_mmu {
> > >  #define PTE_DIRTY_MASK(mmu) ((mmu)->pte_masks.dirty)
> > >  #define PTE_HUGE_MASK(mmu) ((mmu)->pte_masks.huge)
> > >  #define PTE_NX_MASK(mmu) ((mmu)->pte_masks.nx)
> > > +#define PTE_X_MASK(mmu) ((mmu)->pte_masks.x)
> > >  #define PTE_C_MASK(mmu) ((mmu)->pte_masks.c)
> > >  #define PTE_S_MASK(mmu) ((mmu)->pte_masks.s)
> > >  
> > > @@ -1474,6 +1476,7 @@ struct kvm_mmu {
> > >  #define pte_dirty(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
> > >  #define pte_huge(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
> > >  #define pte_nx(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
> > > +#define pte_x(mmu, pte) (!!(*(pte) & PTE_X_MASK(mmu)))
> > 
> > And then here to not assume PRESENT == READABLE, just check if the MMU even has
> > a PRESENT bit.  We may still need changes, e.g. the page table builders actually
> > need to verify a PTE is _writable_, not just present, but that's largely an
> > orthogonal issue.
> 
> Not sure what you mean? How is the PTE being writable relevant to
> assuming PRESENT == READABLE?

Only tangentially, I was try to say that if we ever get to a point where selftests
support read-only mappings, then the below check won't suffice because walking
page tables would get false positives on whether or not an entry is usable, e.g.
if a test wants to create a writable mapping and ends up re-using a read-only
mapping.

The PRESENT == READABLE thing is much more about execute-only mappings (which
selftests also don't support, but as you allude to below, don't require new
hardware functionality).

> > #define is_present_pte(mmu, pte)		\
> > 	(PTE_PRESENT_MASK(mmu) ?		\
> > 	 !!(*(pte) & PTE_PRESENT_MASK(mmu)) :	\
> > 	 !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
> 
> and then Intel will introduce VMX_EPT_WRITE_ONLY_BIT :P
Re: [PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs
Posted by Yosry Ahmed 1 month, 1 week ago
December 29, 2025 at 4:08 PM, "Sean Christopherson" <seanjc@google.com mailto:seanjc@google.com?to=%22Sean%20Christopherson%22%20%3Cseanjc%40google.com%3E > wrote:


> 
> On Tue, Dec 23, 2025, Yosry Ahmed wrote:
> 
> > 
> > On Tue, Dec 23, 2025 at 03:12:09PM -0800, Sean Christopherson wrote:
> >  On Thu, Nov 27, 2025, Yosry Ahmed wrote:
> >  > diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
> >  > index fb2b2e53d453..62e10b296719 100644
> >  > --- a/tools/testing/selftests/kvm/include/x86/processor.h
> >  > +++ b/tools/testing/selftests/kvm/include/x86/processor.h
> >  > @@ -1447,6 +1447,7 @@ struct pte_masks {
> >  > uint64_t dirty;
> >  > uint64_t huge;
> >  > uint64_t nx;
> >  > + uint64_t x;
> >  
> >  To be consistent with e.g. writable, call this executable.
> >  
> >  Was trying to be consistent with 'nx' :) 
> >  
> >  
> >  > uint64_t c;
> >  > uint64_t s;
> >  > };
> >  > @@ -1464,6 +1465,7 @@ struct kvm_mmu {
> >  > #define PTE_DIRTY_MASK(mmu) ((mmu)->pte_masks.dirty)
> >  > #define PTE_HUGE_MASK(mmu) ((mmu)->pte_masks.huge)
> >  > #define PTE_NX_MASK(mmu) ((mmu)->pte_masks.nx)
> >  > +#define PTE_X_MASK(mmu) ((mmu)->pte_masks.x)
> >  > #define PTE_C_MASK(mmu) ((mmu)->pte_masks.c)
> >  > #define PTE_S_MASK(mmu) ((mmu)->pte_masks.s)
> >  > 
> >  > @@ -1474,6 +1476,7 @@ struct kvm_mmu {
> >  > #define pte_dirty(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
> >  > #define pte_huge(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
> >  > #define pte_nx(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
> >  > +#define pte_x(mmu, pte) (!!(*(pte) & PTE_X_MASK(mmu)))
> >  
> >  And then here to not assume PRESENT == READABLE, just check if the MMU even has
> >  a PRESENT bit. We may still need changes, e.g. the page table builders actually
> >  need to verify a PTE is _writable_, not just present, but that's largely an
> >  orthogonal issue.
> >  
> >  Not sure what you mean? How is the PTE being writable relevant to
> >  assuming PRESENT == READABLE?
> > 
> Only tangentially, I was try to say that if we ever get to a point where selftests
> support read-only mappings, then the below check won't suffice because walking
> page tables would get false positives on whether or not an entry is usable, e.g.
> if a test wants to create a writable mapping and ends up re-using a read-only
> mapping.
> 
> The PRESENT == READABLE thing is much more about execute-only mappings (which
> selftests also don't support, but as you allude to below, don't require new
> hardware functionality).

Oh okay, thanks for clarifying. Yeah that makes sense, if/when read-only mappings are ever supported the page table builders will need to be updated accordingly.

Although now that you point this out, I think it would be easy to miss. If new helpers are introduced that just modify existing page tables to remove the write bit, then we'll probably miss updating the page table builders to check for writable mappings. Then again, we'll probably only update the leaf PTEs to be read-only, and the page table builders already do not re-use leaf entries.

We could be paranoid and add some TEST_ASSERT() calls to guard against that (e.g. in virt_create_upper_pte()), but probably not worth it.

> 
> > 
> > #define is_present_pte(mmu, pte) \
> >  (PTE_PRESENT_MASK(mmu) ? \
> >  !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \
> >  !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
> >  
> >  and then Intel will introduce VMX_EPT_WRITE_ONLY_BIT :P
> >
>
Re: [PATCH v3 10/16] KVM: selftests: Reuse virt mapping functions for nested EPTs
Posted by Sean Christopherson 1 month, 1 week ago
On Tue, Dec 30, 2025, Yosry Ahmed wrote:
> December 29, 2025 at 4:08 PM, "Sean Christopherson" <seanjc@google.com mailto:seanjc@google.com?to=%22Sean%20Christopherson%22%20%3Cseanjc%40google.com%3E > wrote:
> > 
> > On Tue, Dec 23, 2025, Yosry Ahmed wrote:
> > 
> > > On Tue, Dec 23, 2025 at 03:12:09PM -0800, Sean Christopherson wrote:
> > >  On Thu, Nov 27, 2025, Yosry Ahmed wrote:
> > >  > diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
> > >  > index fb2b2e53d453..62e10b296719 100644
> > >  > --- a/tools/testing/selftests/kvm/include/x86/processor.h
> > >  > +++ b/tools/testing/selftests/kvm/include/x86/processor.h
> > >  > @@ -1447,6 +1447,7 @@ struct pte_masks {
> > >  > uint64_t dirty;
> > >  > uint64_t huge;
> > >  > uint64_t nx;
> > >  > + uint64_t x;
> > >  
> > >  To be consistent with e.g. writable, call this executable.
> > >  
> > >  Was trying to be consistent with 'nx' :) 
> > >  
> > >  
> > >  > uint64_t c;
> > >  > uint64_t s;
> > >  > };
> > >  > @@ -1464,6 +1465,7 @@ struct kvm_mmu {
> > >  > #define PTE_DIRTY_MASK(mmu) ((mmu)->pte_masks.dirty)
> > >  > #define PTE_HUGE_MASK(mmu) ((mmu)->pte_masks.huge)
> > >  > #define PTE_NX_MASK(mmu) ((mmu)->pte_masks.nx)
> > >  > +#define PTE_X_MASK(mmu) ((mmu)->pte_masks.x)
> > >  > #define PTE_C_MASK(mmu) ((mmu)->pte_masks.c)
> > >  > #define PTE_S_MASK(mmu) ((mmu)->pte_masks.s)
> > >  > 
> > >  > @@ -1474,6 +1476,7 @@ struct kvm_mmu {
> > >  > #define pte_dirty(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
> > >  > #define pte_huge(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
> > >  > #define pte_nx(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
> > >  > +#define pte_x(mmu, pte) (!!(*(pte) & PTE_X_MASK(mmu)))
> > >  
> > >  And then here to not assume PRESENT == READABLE, just check if the MMU even has
> > >  a PRESENT bit. We may still need changes, e.g. the page table builders actually
> > >  need to verify a PTE is _writable_, not just present, but that's largely an
> > >  orthogonal issue.
> > >  
> > >  Not sure what you mean? How is the PTE being writable relevant to
> > >  assuming PRESENT == READABLE?
> > > 
> > Only tangentially, I was try to say that if we ever get to a point where selftests
> > support read-only mappings, then the below check won't suffice because walking
> > page tables would get false positives on whether or not an entry is usable, e.g.
> > if a test wants to create a writable mapping and ends up re-using a read-only
> > mapping.
> > 
> > The PRESENT == READABLE thing is much more about execute-only mappings (which
> > selftests also don't support, but as you allude to below, don't require new
> > hardware functionality).
> 
> Oh okay, thanks for clarifying. Yeah that makes sense, if/when read-only
> mappings are ever supported the page table builders will need to be updated
> accordingly.
> 
> Although now that you point this out, I think it would be easy to miss. If
> new helpers are introduced that just modify existing page tables to remove
> the write bit, then we'll probably miss updating the page table builders to
> check for writable mappings. Then again, we'll probably only update the leaf
> PTEs to be read-only, and the page table builders already do not re-use leaf
> entries.

Yep, unless we rewrite the KUT access test :-)

> We could be paranoid and add some TEST_ASSERT() calls to guard against that
> (e.g. in virt_create_upper_pte()), but probably not worth it.

Agreed.