[PATCH 08/22] KVM: x86/mmu: introduce ACC_READ_MASK

Paolo Bonzini posted 22 patches 2 weeks ago
There is a newer version of this series
[PATCH 08/22] KVM: x86/mmu: introduce ACC_READ_MASK
Posted by Paolo Bonzini 2 weeks ago
Read permissions so far were only needed for EPT, which does not need
ACC_USER_MASK.  Therefore, for EPT page tables ACC_USER_MASK was repurposed
as a read permission bit.

In order to implement nested MBEC, EPT will genuinely have four kinds of
accesses, and there will be no room for such hacks; bite the bullet at
last, enlarging ACC_ALL to four bits and permissions[] to 2^4 bits (u16).

The new code does not enforce that the XWR bits on non-execonly processors
have their R bit set, even when running nested: none of the shadow_*_mask
values have bit 0 set, and make_spte() genuinely relies on ACC_READ_MASK
being requested!  This works becase, if execonly is not supported by the
processor, shadow EPT will generate an EPT misconfig vmexit if the XWR
bits represent a non-readable page, and therefore the pte_access argument
to make_spte() will also always have ACC_READ_MASK set.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 12 +++++-----
 arch/x86/kvm/mmu.h              |  2 +-
 arch/x86/kvm/mmu/mmu.c          | 39 +++++++++++++++++++++------------
 arch/x86/kvm/mmu/mmutrace.h     |  3 ++-
 arch/x86/kvm/mmu/paging_tmpl.h  | 21 +++++++++---------
 arch/x86/kvm/mmu/spte.c         | 18 ++++++---------
 arch/x86/kvm/mmu/spte.h         |  5 +++--
 arch/x86/kvm/vmx/capabilities.h |  5 -----
 arch/x86/kvm/vmx/common.h       |  5 +----
 arch/x86/kvm/vmx/vmx.c          |  3 +--
 10 files changed, 56 insertions(+), 57 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 871c7ff4fb29..3efb238c683c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -317,11 +317,11 @@ struct kvm_kernel_irq_routing_entry;
  * the number of unique SPs that can theoretically be created is 2^n, where n
  * is the number of bits that are used to compute the role.
  *
- * But, even though there are 20 bits in the mask below, not all combinations
+ * But, even though there are 21 bits in the mask below, not all combinations
  * of modes and flags are possible:
  *
  *   - invalid shadow pages are not accounted, mirror pages are not shadowed,
- *     so the bits are effectively 18.
+ *     so the bits are effectively 19.
  *
  *   - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
  *     execonly and ad_disabled are only used for nested EPT which has
@@ -336,7 +336,7 @@ struct kvm_kernel_irq_routing_entry;
  *     cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
  *
  * Therefore, the maximum number of possible upper-level shadow pages for a
- * single gfn is a bit less than 2^13.
+ * single gfn is a bit less than 2^14.
  */
 union kvm_mmu_page_role {
 	u32 word;
@@ -345,7 +345,7 @@ union kvm_mmu_page_role {
 		unsigned has_4_byte_gpte:1;
 		unsigned quadrant:2;
 		unsigned direct:1;
-		unsigned access:3;
+		unsigned access:4;
 		unsigned invalid:1;
 		unsigned efer_nx:1;
 		unsigned cr0_wp:1;
@@ -355,7 +355,7 @@ union kvm_mmu_page_role {
 		unsigned guest_mode:1;
 		unsigned passthrough:1;
 		unsigned is_mirror:1;
-		unsigned :4;
+		unsigned :3;
 
 		/*
 		 * This is left at the top of the word so that
@@ -481,7 +481,7 @@ struct kvm_mmu {
 	 * Byte index: page fault error code [4:1]
 	 * Bit index: pte permissions in ACC_* format
 	 */
-	u8 permissions[16];
+	u16 permissions[16];
 
 	u64 *pae_root;
 	u64 *pml4_root;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b4b6860ab971..f5d35f66750b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -81,7 +81,7 @@ u8 kvm_mmu_get_max_tdp_level(void);
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
 void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value);
 void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
-void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
+void kvm_mmu_set_ept_masks(bool has_ad_bits);
 
 void kvm_init_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 84351df8a9cb..b87dbf9e42b9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2029,7 +2029,7 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	 */
 	const union kvm_mmu_page_role sync_role_ign = {
 		.level = 0xf,
-		.access = 0x7,
+		.access = ACC_ALL,
 		.quadrant = 0x3,
 		.passthrough = 0x1,
 	};
@@ -5426,7 +5426,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
  * update_permission_bitmask() builds what is effectively a
  * two-dimensional array of bools.  The second dimension is
  * provided by individual bits of permissions[pfec >> 1], and
- * logical &, | and ~ operations operate on all the 8 possible
+ * logical &, | and ~ operations operate on all the 16 possible
  * combinations of ACC_* bits.
  */
 #define ACC_BITS_MASK(access) \
@@ -5436,15 +5436,24 @@ reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
 	 (4 & (access) ? 1 << 4 : 0) | \
 	 (5 & (access) ? 1 << 5 : 0) | \
 	 (6 & (access) ? 1 << 6 : 0) | \
-	 (7 & (access) ? 1 << 7 : 0))
+	 (7 & (access) ? 1 << 7 : 0) | \
+	 (8 & (access) ? 1 << 8 : 0) | \
+	 (9 & (access) ? 1 << 9 : 0) | \
+	 (10 & (access) ? 1 << 10 : 0) | \
+	 (11 & (access) ? 1 << 11 : 0) | \
+	 (12 & (access) ? 1 << 12 : 0) | \
+	 (13 & (access) ? 1 << 13 : 0) | \
+	 (14 & (access) ? 1 << 14 : 0) | \
+	 (15 & (access) ? 1 << 15 : 0))
 
 static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 {
 	unsigned byte;
 
-	const u8 x = ACC_BITS_MASK(ACC_EXEC_MASK);
-	const u8 w = ACC_BITS_MASK(ACC_WRITE_MASK);
-	const u8 u = ACC_BITS_MASK(ACC_USER_MASK);
+	const u16 x = ACC_BITS_MASK(ACC_EXEC_MASK);
+	const u16 w = ACC_BITS_MASK(ACC_WRITE_MASK);
+	const u16 u = ACC_BITS_MASK(ACC_USER_MASK);
+	const u16 r = ACC_BITS_MASK(ACC_READ_MASK);
 
 	bool cr4_smep = is_cr4_smep(mmu);
 	bool cr4_smap = is_cr4_smap(mmu);
@@ -5467,24 +5476,26 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 		unsigned pfec = byte << 1;
 
 		/*
-		 * Each "*f" variable has a 1 bit for each UWX value
+		 * Each "*f" variable has a 1 bit for each ACC_* combo
 		 * that causes a fault with the given PFEC.
 		 */
 
+		/* Faults from reads to non-readable pages */
+		u16 rf = (pfec & (PFERR_WRITE_MASK|PFERR_FETCH_MASK)) ? 0 : (u16)~r;
 		/* Faults from writes to non-writable pages */
-		u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
+		u16 wf = (pfec & PFERR_WRITE_MASK) ? (u16)~w : 0;
 		/* Faults from user mode accesses to supervisor pages */
-		u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
+		u16 uf = (pfec & PFERR_USER_MASK) ? (u16)~u : 0;
 		/* Faults from fetches of non-executable pages*/
-		u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
+		u16 ff = (pfec & PFERR_FETCH_MASK) ? (u16)~x : 0;
 		/* Faults from kernel mode fetches of user pages */
-		u8 smepf = 0;
+		u16 smepf = 0;
 		/* Faults from kernel mode accesses of user pages */
-		u8 smapf = 0;
+		u16 smapf = 0;
 
 		if (!ept) {
 			/* Faults from kernel mode accesses to user pages */
-			u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
+			u16 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
 
 			/* Not really needed: !nx will cause pte.nx to fault */
 			if (!efer_nx)
@@ -5517,7 +5528,7 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 				smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
 		}
 
-		mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
+		mmu->permissions[byte] = ff | uf | wf | rf | smepf | smapf;
 	}
 }
 
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index f35a830ce469..44545f6f860a 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -25,7 +25,8 @@
 #define KVM_MMU_PAGE_PRINTK() ({				        \
 	const char *saved_ptr = trace_seq_buffer_ptr(p);		\
 	static const char *access_str[] = {			        \
-		"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"  \
+		"----", "r---", "-w--", "rw--", "--u-", "r-u-", "-wu-", "rwu-", \
+		"---x", "r--x", "-w-x", "rw-x", "--ux", "r-ux", "-wux", "rwux"	\
 	};							        \
 	union kvm_mmu_page_role role;				        \
 								        \
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index ed762bb4b007..bbdbf4ae2d65 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -170,25 +170,24 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-/*
- * For PTTYPE_EPT, a page table can be executable but not readable
- * on supported processors. Therefore, set_spte does not automatically
- * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
- * to signify readability since it isn't used in the EPT case
- */
 static inline unsigned FNAME(gpte_access)(u64 gpte)
 {
 	unsigned access;
 #if PTTYPE == PTTYPE_EPT
 	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
 		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
-		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
+		((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0);
 #else
-	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
-	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
+	/*
+	 * P is set here, so the page is always readable and W/U/!NX represent
+	 * allowed accesses.
+	 */
+	BUILD_BUG_ON(ACC_READ_MASK != PT_PRESENT_MASK);
+	BUILD_BUG_ON(ACC_WRITE_MASK != PT_WRITABLE_MASK);
+	BUILD_BUG_ON(ACC_USER_MASK != PT_USER_MASK);
+	BUILD_BUG_ON(ACC_EXEC_MASK & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK));
 	access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
-	/* Combine NX with P (which is set here) to get ACC_EXEC_MASK.  */
-	access ^= (gpte >> PT64_NX_SHIFT);
+	access |= gpte & PT64_NX_MASK ? 0 : ACC_EXEC_MASK;
 #endif
 
 	return access;
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index e2acd9ed9dba..0b09124b0d54 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -194,12 +194,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	int is_host_mmio = -1;
 	bool wrprot = false;
 
-	/*
-	 * For the EPT case, shadow_present_mask has no RWX bits set if
-	 * exec-only page table entries are supported.  In that case,
-	 * ACC_USER_MASK and shadow_user_mask are used to represent
-	 * read access.  See FNAME(gpte_access) in paging_tmpl.h.
-	 */
 	WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE);
 
 	if (sp->role.ad_disabled)
@@ -228,6 +222,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		pte_access &= ~ACC_EXEC_MASK;
 	}
 
+	if (pte_access & ACC_READ_MASK)
+		spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
+
 	if (pte_access & ACC_EXEC_MASK)
 		spte |= shadow_x_mask;
 	else
@@ -390,6 +387,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
 	u64 spte = SPTE_MMU_PRESENT_MASK;
 
 	spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
+		PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
 		shadow_user_mask | shadow_x_mask | shadow_me_value;
 
 	if (ad_disabled)
@@ -490,18 +488,16 @@ void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask);
 
-void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
+void kvm_mmu_set_ept_masks(bool has_ad_bits)
 {
 	kvm_ad_enabled		= has_ad_bits;
 
-	shadow_user_mask	= VMX_EPT_READABLE_MASK;
+	shadow_user_mask	= 0;
 	shadow_accessed_mask	= VMX_EPT_ACCESS_BIT;
 	shadow_dirty_mask	= VMX_EPT_DIRTY_BIT;
 	shadow_nx_mask		= 0ull;
 	shadow_x_mask		= VMX_EPT_EXECUTABLE_MASK;
-	/* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
-	shadow_present_mask	=
-		(has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
+	shadow_present_mask	= VMX_EPT_SUPPRESS_VE_BIT;
 
 	shadow_acc_track_mask	= VMX_EPT_RWX_MASK;
 	shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 3d77755b6b10..0c305f2f4ba0 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -52,10 +52,11 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
 #define SPTE_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
 #endif
 
-#define ACC_EXEC_MASK    1
+#define ACC_READ_MASK    PT_PRESENT_MASK
 #define ACC_WRITE_MASK   PT_WRITABLE_MASK
 #define ACC_USER_MASK    PT_USER_MASK
-#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+#define ACC_EXEC_MASK    8
+#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
 
 #define SPTE_LEVEL_BITS			9
 #define SPTE_LEVEL_SHIFT(level)		__PT_LEVEL_SHIFT(level, SPTE_LEVEL_BITS)
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 5316c27f6099..3bda6a621d8a 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -288,11 +288,6 @@ static inline bool cpu_has_vmx_flexpriority(void)
 		cpu_has_vmx_virtualize_apic_accesses();
 }
 
-static inline bool cpu_has_vmx_ept_execute_only(void)
-{
-	return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
-}
-
 static inline bool cpu_has_vmx_ept_4levels(void)
 {
 	return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index adf925500b9e..1afbf272efae 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -85,11 +85,8 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
 {
 	u64 error_code;
 
-	/* Is it a read fault? */
-	error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
-		     ? PFERR_USER_MASK : 0;
 	/* Is it a write fault? */
-	error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
+	error_code = (exit_qualification & EPT_VIOLATION_ACC_WRITE)
 		      ? PFERR_WRITE_MASK : 0;
 	/* Is it a fetch fault? */
 	error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2e687761aeaf..98801c408b8c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8425,8 +8425,7 @@ __init int vmx_hardware_setup(void)
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
 	if (enable_ept)
-		kvm_mmu_set_ept_masks(enable_ept_ad_bits,
-				      cpu_has_vmx_ept_execute_only());
+		kvm_mmu_set_ept_masks(enable_ept_ad_bits);
 	else
 		vt_x86_ops.get_mt_mask = NULL;
 
-- 
2.52.0
Re: [PATCH 08/22] KVM: x86/mmu: introduce ACC_READ_MASK
Posted by Jon Kohler 1 week, 4 days ago

> On Mar 20, 2026, at 8:09 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> Read permissions so far were only needed for EPT, which does not need
> ACC_USER_MASK.  Therefore, for EPT page tables ACC_USER_MASK was repurposed
> as a read permission bit.
> 
> In order to implement nested MBEC, EPT will genuinely have four kinds of
> accesses, and there will be no room for such hacks; bite the bullet at
> last, enlarging ACC_ALL to four bits and permissions[] to 2^4 bits (u16).
> 
> The new code does not enforce that the XWR bits on non-execonly processors
> have their R bit set, even when running nested: none of the shadow_*_mask
> values have bit 0 set, and make_spte() genuinely relies on ACC_READ_MASK
> being requested!  This works becase, if execonly is not supported by the
> processor, shadow EPT will generate an EPT misconfig vmexit if the XWR
> bits represent a non-readable page, and therefore the pte_access argument
> to make_spte() will also always have ACC_READ_MASK set.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> arch/x86/include/asm/kvm_host.h | 12 +++++-----
> arch/x86/kvm/mmu.h              |  2 +-
> arch/x86/kvm/mmu/mmu.c          | 39 +++++++++++++++++++++------------
> arch/x86/kvm/mmu/mmutrace.h     |  3 ++-
> arch/x86/kvm/mmu/paging_tmpl.h  | 21 +++++++++---------
> arch/x86/kvm/mmu/spte.c         | 18 ++++++---------
> arch/x86/kvm/mmu/spte.h         |  5 +++--
> arch/x86/kvm/vmx/capabilities.h |  5 -----
> arch/x86/kvm/vmx/common.h       |  5 +----
> arch/x86/kvm/vmx/vmx.c          |  3 +--
> 10 files changed, 56 insertions(+), 57 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 871c7ff4fb29..3efb238c683c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -317,11 +317,11 @@ struct kvm_kernel_irq_routing_entry;
>  * the number of unique SPs that can theoretically be created is 2^n, where n
>  * is the number of bits that are used to compute the role.
>  *
> - * But, even though there are 20 bits in the mask below, not all combinations
> + * But, even though there are 21 bits in the mask below, not all combinations
>  * of modes and flags are possible:
>  *
>  *   - invalid shadow pages are not accounted, mirror pages are not shadowed,
> - *     so the bits are effectively 18.
> + *     so the bits are effectively 19.
>  *
>  *   - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
>  *     execonly and ad_disabled are only used for nested EPT which has
> @@ -336,7 +336,7 @@ struct kvm_kernel_irq_routing_entry;
>  *     cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
>  *
>  * Therefore, the maximum number of possible upper-level shadow pages for a
> - * single gfn is a bit less than 2^13.
> + * single gfn is a bit less than 2^14.
>  */
> union kvm_mmu_page_role {
> u32 word;
> @@ -345,7 +345,7 @@ union kvm_mmu_page_role {
> unsigned has_4_byte_gpte:1;
> unsigned quadrant:2;
> unsigned direct:1;
> - unsigned access:3;
> + unsigned access:4;
> unsigned invalid:1;
> unsigned efer_nx:1;
> unsigned cr0_wp:1;
> @@ -355,7 +355,7 @@ union kvm_mmu_page_role {
> unsigned guest_mode:1;
> unsigned passthrough:1;
> unsigned is_mirror:1;
> - unsigned :4;
> + unsigned :3;
> 
> /*
> * This is left at the top of the word so that
> @@ -481,7 +481,7 @@ struct kvm_mmu {
> * Byte index: page fault error code [4:1]
> * Bit index: pte permissions in ACC_* format
> */
> - u8 permissions[16];
> + u16 permissions[16];
> 
> u64 *pae_root;
> u64 *pml4_root;
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index b4b6860ab971..f5d35f66750b 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -81,7 +81,7 @@ u8 kvm_mmu_get_max_tdp_level(void);
> void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
> void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value);
> void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
> -void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
> +void kvm_mmu_set_ept_masks(bool has_ad_bits);
> 
> void kvm_init_mmu(struct kvm_vcpu *vcpu);
> void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 84351df8a9cb..b87dbf9e42b9 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2029,7 +2029,7 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
> */
> const union kvm_mmu_page_role sync_role_ign = {
> .level = 0xf,
> - .access = 0x7,
> + .access = ACC_ALL,
> .quadrant = 0x3,
> .passthrough = 0x1,
> };
> @@ -5426,7 +5426,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
>  * update_permission_bitmask() builds what is effectively a
>  * two-dimensional array of bools.  The second dimension is
>  * provided by individual bits of permissions[pfec >> 1], and
> - * logical &, | and ~ operations operate on all the 8 possible
> + * logical &, | and ~ operations operate on all the 16 possible
>  * combinations of ACC_* bits.
>  */
> #define ACC_BITS_MASK(access) \
> @@ -5436,15 +5436,24 @@ reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
> (4 & (access) ? 1 << 4 : 0) | \
> (5 & (access) ? 1 << 5 : 0) | \
> (6 & (access) ? 1 << 6 : 0) | \
> - (7 & (access) ? 1 << 7 : 0))
> + (7 & (access) ? 1 << 7 : 0) | \
> + (8 & (access) ? 1 << 8 : 0) | \
> + (9 & (access) ? 1 << 9 : 0) | \
> + (10 & (access) ? 1 << 10 : 0) | \
> + (11 & (access) ? 1 << 11 : 0) | \
> + (12 & (access) ? 1 << 12 : 0) | \
> + (13 & (access) ? 1 << 13 : 0) | \
> + (14 & (access) ? 1 << 14 : 0) | \
> + (15 & (access) ? 1 << 15 : 0))
> 
> static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
> {
> unsigned byte;
> 
> - const u8 x = ACC_BITS_MASK(ACC_EXEC_MASK);
> - const u8 w = ACC_BITS_MASK(ACC_WRITE_MASK);
> - const u8 u = ACC_BITS_MASK(ACC_USER_MASK);
> + const u16 x = ACC_BITS_MASK(ACC_EXEC_MASK);
> + const u16 w = ACC_BITS_MASK(ACC_WRITE_MASK);
> + const u16 u = ACC_BITS_MASK(ACC_USER_MASK);
> + const u16 r = ACC_BITS_MASK(ACC_READ_MASK);
> 
> bool cr4_smep = is_cr4_smep(mmu);
> bool cr4_smap = is_cr4_smap(mmu);
> @@ -5467,24 +5476,26 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
> unsigned pfec = byte << 1;
> 
> /*
> - * Each "*f" variable has a 1 bit for each UWX value
> + * Each "*f" variable has a 1 bit for each ACC_* combo
> * that causes a fault with the given PFEC.
> */
> 
> + /* Faults from reads to non-readable pages */
> + u16 rf = (pfec & (PFERR_WRITE_MASK|PFERR_FETCH_MASK)) ? 0 : (u16)~r;
> /* Faults from writes to non-writable pages */
> - u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
> + u16 wf = (pfec & PFERR_WRITE_MASK) ? (u16)~w : 0;
> /* Faults from user mode accesses to supervisor pages */
> - u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
> + u16 uf = (pfec & PFERR_USER_MASK) ? (u16)~u : 0;
> /* Faults from fetches of non-executable pages*/
> - u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
> + u16 ff = (pfec & PFERR_FETCH_MASK) ? (u16)~x : 0;
> /* Faults from kernel mode fetches of user pages */
> - u8 smepf = 0;
> + u16 smepf = 0;
> /* Faults from kernel mode accesses of user pages */
> - u8 smapf = 0;
> + u16 smapf = 0;
> 
> if (!ept) {
> /* Faults from kernel mode accesses to user pages */
> - u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
> + u16 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
> 
> /* Not really needed: !nx will cause pte.nx to fault */
> if (!efer_nx)
> @@ -5517,7 +5528,7 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
> smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
> }
> 
> - mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
> + mmu->permissions[byte] = ff | uf | wf | rf | smepf | smapf;
> }
> }
> 
> diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
> index f35a830ce469..44545f6f860a 100644
> --- a/arch/x86/kvm/mmu/mmutrace.h
> +++ b/arch/x86/kvm/mmu/mmutrace.h
> @@ -25,7 +25,8 @@
> #define KVM_MMU_PAGE_PRINTK() ({        \
> const char *saved_ptr = trace_seq_buffer_ptr(p); \
> static const char *access_str[] = {        \
> - "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"  \
> + "----", "r---", "-w--", "rw--", "--u-", "r-u-", "-wu-", "rwu-", \
> + "---x", "r--x", "-w-x", "rw-x", "--ux", "r-ux", "-wux", "rwux" \
> };        \
> union kvm_mmu_page_role role;        \
>        \
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index ed762bb4b007..bbdbf4ae2d65 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -170,25 +170,24 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
> return true;
> }
> 
> -/*
> - * For PTTYPE_EPT, a page table can be executable but not readable
> - * on supported processors. Therefore, set_spte does not automatically
> - * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
> - * to signify readability since it isn't used in the EPT case
> - */
> static inline unsigned FNAME(gpte_access)(u64 gpte)
> {
> unsigned access;
> #if PTTYPE == PTTYPE_EPT
> access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
> ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
> - ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
> + ((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0);
> #else
> - BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
> - BUILD_BUG_ON(ACC_EXEC_MASK != 1);
> + /*
> + * P is set here, so the page is always readable and W/U/!NX represent
> + * allowed accesses.
> + */
> + BUILD_BUG_ON(ACC_READ_MASK != PT_PRESENT_MASK);
> + BUILD_BUG_ON(ACC_WRITE_MASK != PT_WRITABLE_MASK);
> + BUILD_BUG_ON(ACC_USER_MASK != PT_USER_MASK);
> + BUILD_BUG_ON(ACC_EXEC_MASK & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK));
> access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
> - /* Combine NX with P (which is set here) to get ACC_EXEC_MASK.  */
> - access ^= (gpte >> PT64_NX_SHIFT);
> + access |= gpte & PT64_NX_MASK ? 0 : ACC_EXEC_MASK;
> #endif
> 
> return access;
> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
> index e2acd9ed9dba..0b09124b0d54 100644
> --- a/arch/x86/kvm/mmu/spte.c
> +++ b/arch/x86/kvm/mmu/spte.c
> @@ -194,12 +194,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> int is_host_mmio = -1;
> bool wrprot = false;
> 
> - /*
> - * For the EPT case, shadow_present_mask has no RWX bits set if
> - * exec-only page table entries are supported.  In that case,
> - * ACC_USER_MASK and shadow_user_mask are used to represent
> - * read access.  See FNAME(gpte_access) in paging_tmpl.h.
> - */
> WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE);
> 
> if (sp->role.ad_disabled)
> @@ -228,6 +222,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> pte_access &= ~ACC_EXEC_MASK;
> }
> 
> + if (pte_access & ACC_READ_MASK)
> + spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
> +
> if (pte_access & ACC_EXEC_MASK)
> spte |= shadow_x_mask;
> else
> @@ -390,6 +387,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
> u64 spte = SPTE_MMU_PRESENT_MASK;
> 
> spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
> + PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
> shadow_user_mask | shadow_x_mask | shadow_me_value;
> 
> if (ad_disabled)
> @@ -490,18 +488,16 @@ void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
> }
> EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask);

What kernel version were you doing this against?

git am is giving me grief as I get a failed to apply because this should be 
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_me_spte_mask);

This was there since 6.18: https://github.com/torvalds/linux/commit/6b36119b94d0b2bb8cea9d512017efafd461d6ac

> 
> -void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
> +void kvm_mmu_set_ept_masks(bool has_ad_bits)
> {
> kvm_ad_enabled = has_ad_bits;
> 
> - shadow_user_mask = VMX_EPT_READABLE_MASK;
> + shadow_user_mask = 0;
> shadow_accessed_mask = VMX_EPT_ACCESS_BIT;
> shadow_dirty_mask = VMX_EPT_DIRTY_BIT;
> shadow_nx_mask = 0ull;
> shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
> - /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
> - shadow_present_mask =
> - (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
> + shadow_present_mask = VMX_EPT_SUPPRESS_VE_BIT;
> 
> shadow_acc_track_mask = VMX_EPT_RWX_MASK;
> shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 3d77755b6b10..0c305f2f4ba0 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -52,10 +52,11 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
> #define SPTE_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
> #endif
> 
> -#define ACC_EXEC_MASK    1
> +#define ACC_READ_MASK    PT_PRESENT_MASK
> #define ACC_WRITE_MASK   PT_WRITABLE_MASK
> #define ACC_USER_MASK    PT_USER_MASK
> -#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
> +#define ACC_EXEC_MASK    8
> +#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
> 
> #define SPTE_LEVEL_BITS 9
> #define SPTE_LEVEL_SHIFT(level) __PT_LEVEL_SHIFT(level, SPTE_LEVEL_BITS)
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index 5316c27f6099..3bda6a621d8a 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -288,11 +288,6 @@ static inline bool cpu_has_vmx_flexpriority(void)
> cpu_has_vmx_virtualize_apic_accesses();
> }
> 
> -static inline bool cpu_has_vmx_ept_execute_only(void)
> -{
> - return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
> -}
> -
> static inline bool cpu_has_vmx_ept_4levels(void)
> {
> return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
> diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
> index adf925500b9e..1afbf272efae 100644
> --- a/arch/x86/kvm/vmx/common.h
> +++ b/arch/x86/kvm/vmx/common.h
> @@ -85,11 +85,8 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
> {
> u64 error_code;
> 
> - /* Is it a read fault? */
> - error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
> -     ? PFERR_USER_MASK : 0;
> /* Is it a write fault? */
> - error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
> + error_code = (exit_qualification & EPT_VIOLATION_ACC_WRITE)
>      ? PFERR_WRITE_MASK : 0;
> /* Is it a fetch fault? */
> error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 2e687761aeaf..98801c408b8c 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -8425,8 +8425,7 @@ __init int vmx_hardware_setup(void)
> set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> 
> if (enable_ept)
> - kvm_mmu_set_ept_masks(enable_ept_ad_bits,
> -      cpu_has_vmx_ept_execute_only());
> + kvm_mmu_set_ept_masks(enable_ept_ad_bits);
> else
> vt_x86_ops.get_mt_mask = NULL;
> 
> -- 
> 2.52.0
> 

Re: [PATCH 08/22] KVM: x86/mmu: introduce ACC_READ_MASK
Posted by Jon Kohler 1 week, 4 days ago

> On Mar 20, 2026, at 8:09 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> Read permissions so far were only needed for EPT, which does not need
> ACC_USER_MASK.  Therefore, for EPT page tables ACC_USER_MASK was repurposed
> as a read permission bit.
> 
> In order to implement nested MBEC, EPT will genuinely have four kinds of
> accesses, and there will be no room for such hacks; bite the bullet at
> last, enlarging ACC_ALL to four bits and permissions[] to 2^4 bits (u16).
> 
> The new code does not enforce that the XWR bits on non-execonly processors
> have their R bit set, even when running nested: none of the shadow_*_mask
> values have bit 0 set, and make_spte() genuinely relies on ACC_READ_MASK
> being requested!  This works becase, if execonly is not supported by the
> processor, shadow EPT will generate an EPT misconfig vmexit if the XWR
> bits represent a non-readable page, and therefore the pte_access argument
> to make_spte() will also always have ACC_READ_MASK set.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> arch/x86/include/asm/kvm_host.h | 12 +++++-----
> arch/x86/kvm/mmu.h              |  2 +-
> arch/x86/kvm/mmu/mmu.c          | 39 +++++++++++++++++++++------------
> arch/x86/kvm/mmu/mmutrace.h     |  3 ++-
> arch/x86/kvm/mmu/paging_tmpl.h  | 21 +++++++++---------
> arch/x86/kvm/mmu/spte.c         | 18 ++++++---------
> arch/x86/kvm/mmu/spte.h         |  5 +++--
> arch/x86/kvm/vmx/capabilities.h |  5 -----
> arch/x86/kvm/vmx/common.h       |  5 +----
> arch/x86/kvm/vmx/vmx.c          |  3 +--
> 10 files changed, 56 insertions(+), 57 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 871c7ff4fb29..3efb238c683c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -317,11 +317,11 @@ struct kvm_kernel_irq_routing_entry;
>  * the number of unique SPs that can theoretically be created is 2^n, where n
>  * is the number of bits that are used to compute the role.
>  *
> - * But, even though there are 20 bits in the mask below, not all combinations
> + * But, even though there are 21 bits in the mask below, not all combinations
>  * of modes and flags are possible:
>  *
>  *   - invalid shadow pages are not accounted, mirror pages are not shadowed,
> - *     so the bits are effectively 18.
> + *     so the bits are effectively 19.
>  *
>  *   - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
>  *     execonly and ad_disabled are only used for nested EPT which has
> @@ -336,7 +336,7 @@ struct kvm_kernel_irq_routing_entry;
>  *     cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
>  *
>  * Therefore, the maximum number of possible upper-level shadow pages for a
> - * single gfn is a bit less than 2^13.
> + * single gfn is a bit less than 2^14.
>  */
> union kvm_mmu_page_role {
> u32 word;
> @@ -345,7 +345,7 @@ union kvm_mmu_page_role {
> unsigned has_4_byte_gpte:1;
> unsigned quadrant:2;
> unsigned direct:1;
> - unsigned access:3;
> + unsigned access:4;
> unsigned invalid:1;
> unsigned efer_nx:1;
> unsigned cr0_wp:1;
> @@ -355,7 +355,7 @@ union kvm_mmu_page_role {
> unsigned guest_mode:1;
> unsigned passthrough:1;
> unsigned is_mirror:1;
> - unsigned :4;
> + unsigned :3;

checkpatch.pl complaint:
ERROR: space prohibited before that ':' (ctx:WxV)
#78: FILE: arch/x86/include/asm/kvm_host.h:360:
+               unsigned :3;
                         ^

> 
> /*
> * This is left at the top of the word so that
> @@ -481,7 +481,7 @@ struct kvm_mmu {
> * Byte index: page fault error code [4:1]
> * Bit index: pte permissions in ACC_* format
> */
> - u8 permissions[16];
> + u16 permissions[16];
> 
> u64 *pae_root;
> u64 *pml4_root;
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index b4b6860ab971..f5d35f66750b 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -81,7 +81,7 @@ u8 kvm_mmu_get_max_tdp_level(void);
> void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
> void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value);
> void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
> -void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
> +void kvm_mmu_set_ept_masks(bool has_ad_bits);
> 
> void kvm_init_mmu(struct kvm_vcpu *vcpu);
> void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 84351df8a9cb..b87dbf9e42b9 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2029,7 +2029,7 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
> */
> const union kvm_mmu_page_role sync_role_ign = {
> .level = 0xf,
> - .access = 0x7,
> + .access = ACC_ALL,
> .quadrant = 0x3,
> .passthrough = 0x1,
> };
> @@ -5426,7 +5426,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
>  * update_permission_bitmask() builds what is effectively a
>  * two-dimensional array of bools.  The second dimension is
>  * provided by individual bits of permissions[pfec >> 1], and
> - * logical &, | and ~ operations operate on all the 8 possible
> + * logical &, | and ~ operations operate on all the 16 possible
>  * combinations of ACC_* bits.
>  */
> #define ACC_BITS_MASK(access) \
> @@ -5436,15 +5436,24 @@ reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly)
> (4 & (access) ? 1 << 4 : 0) | \
> (5 & (access) ? 1 << 5 : 0) | \
> (6 & (access) ? 1 << 6 : 0) | \
> - (7 & (access) ? 1 << 7 : 0))
> + (7 & (access) ? 1 << 7 : 0) | \
> + (8 & (access) ? 1 << 8 : 0) | \
> + (9 & (access) ? 1 << 9 : 0) | \
> + (10 & (access) ? 1 << 10 : 0) | \
> + (11 & (access) ? 1 << 11 : 0) | \
> + (12 & (access) ? 1 << 12 : 0) | \
> + (13 & (access) ? 1 << 13 : 0) | \
> + (14 & (access) ? 1 << 14 : 0) | \
> + (15 & (access) ? 1 << 15 : 0))
> 
> static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
> {
> unsigned byte;
> 
> - const u8 x = ACC_BITS_MASK(ACC_EXEC_MASK);
> - const u8 w = ACC_BITS_MASK(ACC_WRITE_MASK);
> - const u8 u = ACC_BITS_MASK(ACC_USER_MASK);
> + const u16 x = ACC_BITS_MASK(ACC_EXEC_MASK);
> + const u16 w = ACC_BITS_MASK(ACC_WRITE_MASK);
> + const u16 u = ACC_BITS_MASK(ACC_USER_MASK);
> + const u16 r = ACC_BITS_MASK(ACC_READ_MASK);
> 
> bool cr4_smep = is_cr4_smep(mmu);
> bool cr4_smap = is_cr4_smap(mmu);
> @@ -5467,24 +5476,26 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
> unsigned pfec = byte << 1;
> 
> /*
> - * Each "*f" variable has a 1 bit for each UWX value
> + * Each "*f" variable has a 1 bit for each ACC_* combo
> * that causes a fault with the given PFEC.
> */
> 
> + /* Faults from reads to non-readable pages */
> + u16 rf = (pfec & (PFERR_WRITE_MASK|PFERR_FETCH_MASK)) ? 0 : (u16)~r;
> /* Faults from writes to non-writable pages */
> - u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
> + u16 wf = (pfec & PFERR_WRITE_MASK) ? (u16)~w : 0;
> /* Faults from user mode accesses to supervisor pages */
> - u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
> + u16 uf = (pfec & PFERR_USER_MASK) ? (u16)~u : 0;
> /* Faults from fetches of non-executable pages*/
> - u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
> + u16 ff = (pfec & PFERR_FETCH_MASK) ? (u16)~x : 0;
> /* Faults from kernel mode fetches of user pages */
> - u8 smepf = 0;
> + u16 smepf = 0;
> /* Faults from kernel mode accesses of user pages */
> - u8 smapf = 0;
> + u16 smapf = 0;
> 
> if (!ept) {
> /* Faults from kernel mode accesses to user pages */
> - u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
> + u16 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
> 
> /* Not really needed: !nx will cause pte.nx to fault */
> if (!efer_nx)
> @@ -5517,7 +5528,7 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
> smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
> }
> 
> - mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
> + mmu->permissions[byte] = ff | uf | wf | rf | smepf | smapf;
> }
> }
> 
> diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
> index f35a830ce469..44545f6f860a 100644
> --- a/arch/x86/kvm/mmu/mmutrace.h
> +++ b/arch/x86/kvm/mmu/mmutrace.h
> @@ -25,7 +25,8 @@
> #define KVM_MMU_PAGE_PRINTK() ({        \
> const char *saved_ptr = trace_seq_buffer_ptr(p); \
> static const char *access_str[] = {        \
> - "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"  \
> + "----", "r---", "-w--", "rw--", "--u-", "r-u-", "-wu-", "rwu-", \
> + "---x", "r--x", "-w-x", "rw-x", "--ux", "r-ux", "-wux", "rwux" \
> };        \
> union kvm_mmu_page_role role;        \
>        \
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index ed762bb4b007..bbdbf4ae2d65 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -170,25 +170,24 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
> return true;
> }
> 
> -/*
> - * For PTTYPE_EPT, a page table can be executable but not readable
> - * on supported processors. Therefore, set_spte does not automatically
> - * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
> - * to signify readability since it isn't used in the EPT case
> - */
> static inline unsigned FNAME(gpte_access)(u64 gpte)
> {
> unsigned access;
> #if PTTYPE == PTTYPE_EPT
> access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
> ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
> - ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
> + ((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0);
> #else
> - BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
> - BUILD_BUG_ON(ACC_EXEC_MASK != 1);
> + /*
> + * P is set here, so the page is always readable and W/U/!NX represent
> + * allowed accesses.
> + */
> + BUILD_BUG_ON(ACC_READ_MASK != PT_PRESENT_MASK);
> + BUILD_BUG_ON(ACC_WRITE_MASK != PT_WRITABLE_MASK);
> + BUILD_BUG_ON(ACC_USER_MASK != PT_USER_MASK);
> + BUILD_BUG_ON(ACC_EXEC_MASK & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK));
> access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
> - /* Combine NX with P (which is set here) to get ACC_EXEC_MASK.  */
> - access ^= (gpte >> PT64_NX_SHIFT);
> + access |= gpte & PT64_NX_MASK ? 0 : ACC_EXEC_MASK;
> #endif
> 
> return access;
> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
> index e2acd9ed9dba..0b09124b0d54 100644
> --- a/arch/x86/kvm/mmu/spte.c
> +++ b/arch/x86/kvm/mmu/spte.c
> @@ -194,12 +194,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> int is_host_mmio = -1;
> bool wrprot = false;
> 
> - /*
> - * For the EPT case, shadow_present_mask has no RWX bits set if
> - * exec-only page table entries are supported.  In that case,
> - * ACC_USER_MASK and shadow_user_mask are used to represent
> - * read access.  See FNAME(gpte_access) in paging_tmpl.h.
> - */
> WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE);
> 
> if (sp->role.ad_disabled)
> @@ -228,6 +222,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> pte_access &= ~ACC_EXEC_MASK;
> }
> 
> + if (pte_access & ACC_READ_MASK)
> + spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
> +
> if (pte_access & ACC_EXEC_MASK)
> spte |= shadow_x_mask;
> else
> @@ -390,6 +387,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
> u64 spte = SPTE_MMU_PRESENT_MASK;
> 
> spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
> + PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
> shadow_user_mask | shadow_x_mask | shadow_me_value;
> 
> if (ad_disabled)
> @@ -490,18 +488,16 @@ void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
> }
> EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask);
> 
> -void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
> +void kvm_mmu_set_ept_masks(bool has_ad_bits)
> {
> kvm_ad_enabled = has_ad_bits;
> 
> - shadow_user_mask = VMX_EPT_READABLE_MASK;
> + shadow_user_mask = 0;
> shadow_accessed_mask = VMX_EPT_ACCESS_BIT;
> shadow_dirty_mask = VMX_EPT_DIRTY_BIT;
> shadow_nx_mask = 0ull;
> shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
> - /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
> - shadow_present_mask =
> - (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
> + shadow_present_mask = VMX_EPT_SUPPRESS_VE_BIT;
> 
> shadow_acc_track_mask = VMX_EPT_RWX_MASK;
> shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 3d77755b6b10..0c305f2f4ba0 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -52,10 +52,11 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
> #define SPTE_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
> #endif
> 
> -#define ACC_EXEC_MASK    1
> +#define ACC_READ_MASK    PT_PRESENT_MASK
> #define ACC_WRITE_MASK   PT_WRITABLE_MASK
> #define ACC_USER_MASK    PT_USER_MASK
> -#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
> +#define ACC_EXEC_MASK    8
> +#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
> 
> #define SPTE_LEVEL_BITS 9
> #define SPTE_LEVEL_SHIFT(level) __PT_LEVEL_SHIFT(level, SPTE_LEVEL_BITS)
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index 5316c27f6099..3bda6a621d8a 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -288,11 +288,6 @@ static inline bool cpu_has_vmx_flexpriority(void)
> cpu_has_vmx_virtualize_apic_accesses();
> }
> 
> -static inline bool cpu_has_vmx_ept_execute_only(void)
> -{
> - return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
> -}
> -
> static inline bool cpu_has_vmx_ept_4levels(void)
> {
> return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
> diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
> index adf925500b9e..1afbf272efae 100644
> --- a/arch/x86/kvm/vmx/common.h
> +++ b/arch/x86/kvm/vmx/common.h
> @@ -85,11 +85,8 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
> {
> u64 error_code;
> 
> - /* Is it a read fault? */
> - error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
> -     ? PFERR_USER_MASK : 0;
> /* Is it a write fault? */
> - error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
> + error_code = (exit_qualification & EPT_VIOLATION_ACC_WRITE)
>      ? PFERR_WRITE_MASK : 0;
> /* Is it a fetch fault? */
> error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 2e687761aeaf..98801c408b8c 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -8425,8 +8425,7 @@ __init int vmx_hardware_setup(void)
> set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> 
> if (enable_ept)
> - kvm_mmu_set_ept_masks(enable_ept_ad_bits,
> -      cpu_has_vmx_ept_execute_only());
> + kvm_mmu_set_ept_masks(enable_ept_ad_bits);
> else
> vt_x86_ops.get_mt_mask = NULL;
> 
> -- 
> 2.52.0
>