[PATCH 13/22] KVM: x86/mmu: add support for nested MBEC

Paolo Bonzini posted 22 patches 2 weeks ago
There is a newer version of this series
[PATCH 13/22] KVM: x86/mmu: add support for nested MBEC
Posted by Paolo Bonzini 2 weeks ago
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/paging_tmpl.h | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index c657ea90bb33..d50085308506 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -124,12 +124,17 @@ static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *acce
 	*access &= mask;
 }
 
-static inline int FNAME(is_present_gpte)(unsigned long pte)
+static inline int FNAME(is_present_gpte)(struct kvm_mmu *mmu,
+					 unsigned long pte)
 {
 #if PTTYPE != PTTYPE_EPT
 	return pte & PT_PRESENT_MASK;
 #else
-	return pte & 7;
+	/*
+	 * For EPT, an entry is present if any of bits 2:0 are set.
+	 * With mode-based execute control, bit 10 also indicates presence.
+	 */
+	return pte & (7 | (mmu_has_mbec(mmu) ? VMX_EPT_USER_EXECUTABLE_MASK : 0));
 #endif
 }
 
@@ -152,7 +157,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu_page *sp, u64 *spte,
 				  u64 gpte)
 {
-	if (!FNAME(is_present_gpte)(gpte))
+	if (!FNAME(is_present_gpte)(vcpu->arch.mmu, gpte))
 		goto no_present;
 
 	/* Prefetch only accessed entries (unless A/D bits are disabled). */
@@ -173,14 +178,17 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 static inline unsigned FNAME(gpte_access)(u64 gpte)
 {
 	unsigned access;
-#if PTTYPE == PTTYPE_EPT
 	/*
-	 * For now nested MBEC is not supported and permission_fault() ignores
-	 * ACC_USER_EXEC_MASK.
+	 * Set bits in ACC_*_MASK even if they might not be used in the
+	 * actual checks.  For example, if EFER.NX is clear permission_fault()
+	 * will ignore ACC_EXEC_MASK, and if MBEC is disabled it will
+	 * ignore ACC_USER_EXEC_MASK.
 	 */
+#if PTTYPE == PTTYPE_EPT
 	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
 		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
-		((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0);
+		((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0) |
+		((gpte & VMX_EPT_USER_EXECUTABLE_MASK) ? ACC_USER_EXEC_MASK : 0);
 #else
 	/*
 	 * P is set here, so the page is always readable and W/U/!NX represent
@@ -335,7 +343,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	if (walker->level == PT32E_ROOT_LEVEL) {
 		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
 		trace_kvm_mmu_paging_element(pte, walker->level);
-		if (!FNAME(is_present_gpte)(pte))
+		if (!FNAME(is_present_gpte)(mmu, pte))
 			goto error;
 		--walker->level;
 	}
@@ -417,7 +425,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 		 */
 		pte_access = pt_access & (pte ^ walk_nx_mask);
 
-		if (unlikely(!FNAME(is_present_gpte)(pte)))
+		if (unlikely(!FNAME(is_present_gpte)(mmu, pte)))
 			goto error;
 
 		if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) {
@@ -514,6 +522,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 		 * ACC_*_MASK flags!
 		 */
 		walker->fault.exit_qualification |= EPT_VIOLATION_RWX_TO_PROT(pte_access);
+		if (mmu_has_mbec(mmu))
+			walker->fault.exit_qualification |=
+				EPT_VIOLATION_USER_EXEC_TO_PROT(pte_access);
 	}
 #endif
 	walker->fault.address = addr;
-- 
2.52.0
Re: [PATCH 13/22] KVM: x86/mmu: add support for nested MBEC
Posted by Jon Kohler 1 week, 4 days ago

> On Mar 20, 2026, at 8:09 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 

Missing the body of the commit msg?

> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> arch/x86/kvm/mmu/paging_tmpl.h | 29 ++++++++++++++++++++---------
> 1 file changed, 20 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index c657ea90bb33..d50085308506 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -124,12 +124,17 @@ static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *acce
> *access &= mask;
> }
> 
> -static inline int FNAME(is_present_gpte)(unsigned long pte)
> +static inline int FNAME(is_present_gpte)(struct kvm_mmu *mmu,
> + unsigned long pte)
> {
> #if PTTYPE != PTTYPE_EPT
> return pte & PT_PRESENT_MASK;
> #else
> - return pte & 7;
> + /*
> + * For EPT, an entry is present if any of bits 2:0 are set.
> + * With mode-based execute control, bit 10 also indicates presence.
> + */
> + return pte & (7 | (mmu_has_mbec(mmu) ? VMX_EPT_USER_EXECUTABLE_MASK : 0));
> #endif
> }
> 
> @@ -152,7 +157,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
>  struct kvm_mmu_page *sp, u64 *spte,
>  u64 gpte)
> {
> - if (!FNAME(is_present_gpte)(gpte))
> + if (!FNAME(is_present_gpte)(vcpu->arch.mmu, gpte))
> goto no_present;
> 
> /* Prefetch only accessed entries (unless A/D bits are disabled). */
> @@ -173,14 +178,17 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
> static inline unsigned FNAME(gpte_access)(u64 gpte)
> {
> unsigned access;
> -#if PTTYPE == PTTYPE_EPT
> /*
> - * For now nested MBEC is not supported and permission_fault() ignores
> - * ACC_USER_EXEC_MASK.
> + * Set bits in ACC_*_MASK even if they might not be used in the
> + * actual checks.  For example, if EFER.NX is clear permission_fault()
> + * will ignore ACC_EXEC_MASK, and if MBEC is disabled it will
> + * ignore ACC_USER_EXEC_MASK.
> */
> +#if PTTYPE == PTTYPE_EPT
> access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
> ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
> - ((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0);
> + ((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0) |
> + ((gpte & VMX_EPT_USER_EXECUTABLE_MASK) ? ACC_USER_EXEC_MASK : 0);
> #else
> /*
> * P is set here, so the page is always readable and W/U/!NX represent
> @@ -335,7 +343,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> if (walker->level == PT32E_ROOT_LEVEL) {
> pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
> trace_kvm_mmu_paging_element(pte, walker->level);
> - if (!FNAME(is_present_gpte)(pte))
> + if (!FNAME(is_present_gpte)(mmu, pte))
> goto error;
> --walker->level;
> }
> @@ -417,7 +425,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> */
> pte_access = pt_access & (pte ^ walk_nx_mask);
> 
> - if (unlikely(!FNAME(is_present_gpte)(pte)))
> + if (unlikely(!FNAME(is_present_gpte)(mmu, pte)))
> goto error;
> 
> if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) {
> @@ -514,6 +522,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> * ACC_*_MASK flags!
> */
> walker->fault.exit_qualification |= EPT_VIOLATION_RWX_TO_PROT(pte_access);
> + if (mmu_has_mbec(mmu))
> + walker->fault.exit_qualification |=
> + EPT_VIOLATION_USER_EXEC_TO_PROT(pte_access);
> }
> #endif
> walker->fault.address = addr;
> -- 
> 2.52.0
>