[PATCH 10/22] KVM: x86/mmu: split XS/XU bits for MBEC

Paolo Bonzini posted 22 patches 2 weeks ago
There is a newer version of this series
[PATCH 10/22] KVM: x86/mmu: split XS/XU bits for MBEC
Posted by Paolo Bonzini 2 weeks ago
When EPT is in use, replace ACC_USER_MASK with ACC_USER_EXEC_MASK,
so that supervisor and user-mode execution can be controlled
independently (ACC_USER_MASK would not allow a setting similar to
XU=0 XS=1 W=1 R=1).

Replace shadow_x_mask with shadow_xs_mask/shadow_xu_mask, to allow
setting XS and XU bits separately in EPT entries.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/vmx.h     |  1 +
 arch/x86/kvm/mmu/mmu.c         | 15 ++++++++---
 arch/x86/kvm/mmu/mmutrace.h    |  6 ++---
 arch/x86/kvm/mmu/paging_tmpl.h |  4 +++
 arch/x86/kvm/mmu/spte.c        | 47 ++++++++++++++++++++++------------
 arch/x86/kvm/mmu/spte.h        |  8 +++---
 6 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 4a0804cc7c82..0041f8a77447 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -538,6 +538,7 @@ enum vmcs_field {
 #define VMX_EPT_IPAT_BIT    			(1ull << 6)
 #define VMX_EPT_ACCESS_BIT			(1ull << 8)
 #define VMX_EPT_DIRTY_BIT			(1ull << 9)
+#define VMX_EPT_USER_EXECUTABLE_MASK		(1ull << 10)
 #define VMX_EPT_SUPPRESS_VE_BIT			(1ull << 63)
 #define VMX_EPT_RWX_MASK                        (VMX_EPT_READABLE_MASK |       \
 						 VMX_EPT_WRITABLE_MASK |       \
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b7366e416baa..254d69c4b9f3 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5371,7 +5371,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 static inline bool boot_cpu_is_amd(void)
 {
 	WARN_ON_ONCE(!tdp_enabled);
-	return shadow_x_mask == 0;
+	return shadow_xs_mask == 0;
 }
 
 /*
@@ -5450,7 +5450,6 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 {
 	unsigned byte;
 
-	const u16 x = ACC_BITS_MASK(ACC_EXEC_MASK);
 	const u16 w = ACC_BITS_MASK(ACC_WRITE_MASK);
 	const u16 r = ACC_BITS_MASK(ACC_READ_MASK);
 
@@ -5491,8 +5490,18 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 		u16 smapf = 0;
 
 		if (ept) {
-			ff = (pfec & PFERR_FETCH_MASK) ? (u16)~x : 0;
+			const u16 xs = ACC_BITS_MASK(ACC_EXEC_MASK);
+			const u16 xu = ACC_BITS_MASK(ACC_USER_EXEC_MASK);
+
+			if (pfec & PFERR_FETCH_MASK) {
+				/* Ignore XU unless MBEC is enabled.  */
+				if (cr4_smep)
+					ff = pfec & PFERR_USER_MASK ? (u16)~xu : (u16)~xs;
+				else
+					ff = (u16)~xs;
+			}
 		} else {
+			const u16 x = ACC_BITS_MASK(ACC_EXEC_MASK);
 			const u16 u = ACC_BITS_MASK(ACC_USER_MASK);
 
 			/* Faults from kernel mode accesses to user pages */
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index 44545f6f860a..e22588d3e145 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -354,8 +354,8 @@ TRACE_EVENT(
 		__entry->sptep = virt_to_phys(sptep);
 		__entry->level = level;
 		__entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
-		__entry->x = is_executable_pte(__entry->spte);
-		__entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
+		__entry->x = (__entry->spte & (shadow_xs_mask | shadow_nx_mask)) == shadow_xs_mask;
+		__entry->u = !!(__entry->spte & (shadow_xu_mask | shadow_user_mask));
 	),
 
 	TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
@@ -363,7 +363,7 @@ TRACE_EVENT(
 		  __entry->r ? "r" : "-",
 		  __entry->spte & PT_WRITABLE_MASK ? "w" : "-",
 		  __entry->x ? "x" : "-",
-		  __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
+		  __entry->u ? "u" : "-",
 		  __entry->level, __entry->sptep
 	)
 );
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index bbdbf4ae2d65..c657ea90bb33 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -174,6 +174,10 @@ static inline unsigned FNAME(gpte_access)(u64 gpte)
 {
 	unsigned access;
 #if PTTYPE == PTTYPE_EPT
+	/*
+	 * For now nested MBEC is not supported and permission_fault() ignores
+	 * ACC_USER_EXEC_MASK.
+	 */
 	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
 		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
 		((gpte & VMX_EPT_READABLE_MASK) ? ACC_READ_MASK : 0);
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 0b09124b0d54..0b3e2b97afbf 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -29,8 +29,9 @@ bool __read_mostly kvm_ad_enabled;
 u64 __read_mostly shadow_host_writable_mask;
 u64 __read_mostly shadow_mmu_writable_mask;
 u64 __read_mostly shadow_nx_mask;
-u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
 u64 __read_mostly shadow_user_mask;
+u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
+u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
 u64 __read_mostly shadow_accessed_mask;
 u64 __read_mostly shadow_dirty_mask;
 u64 __read_mostly shadow_mmio_value;
@@ -216,22 +217,30 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	 * when CR0.PG is toggled, but leveraging that to ignore the mitigation
 	 * would tie make_spte() further to vCPU/MMU state, and add complexity
 	 * just to optimize a mode that is anything but performance critical.
+	 *
+	 * Use ACC_USER_EXEC_MASK here assuming only Intel processors (EPT)
+	 * are affected by the NX huge page erratum.
 	 */
-	if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
+	if (level > PG_LEVEL_4K &&
+	    (pte_access & (ACC_EXEC_MASK | ACC_USER_EXEC_MASK)) &&
 	    is_nx_huge_page_enabled(vcpu->kvm)) {
-		pte_access &= ~ACC_EXEC_MASK;
+		pte_access &= ~(ACC_EXEC_MASK | ACC_USER_EXEC_MASK);
 	}
 
 	if (pte_access & ACC_READ_MASK)
 		spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
 
-	if (pte_access & ACC_EXEC_MASK)
-		spte |= shadow_x_mask;
-	else
-		spte |= shadow_nx_mask;
-
-	if (pte_access & ACC_USER_MASK)
-		spte |= shadow_user_mask;
+	if (shadow_nx_mask) {
+		if (!(pte_access & ACC_EXEC_MASK))
+			spte |= shadow_nx_mask;
+		if (pte_access & ACC_USER_MASK)
+			spte |= shadow_user_mask;
+	} else {
+		if (pte_access & ACC_EXEC_MASK)
+			spte |= shadow_xs_mask;
+		if (pte_access & ACC_USER_EXEC_MASK)
+			spte |= shadow_xu_mask;
+	}
 
 	if (level > PG_LEVEL_4K)
 		spte |= PT_PAGE_SIZE_MASK;
@@ -317,11 +326,13 @@ static u64 modify_spte_protections(u64 spte, u64 set, u64 clear)
 static u64 make_spte_executable(u64 spte, u8 access)
 {
 	u64 set, clear;
-	if (access & ACC_EXEC_MASK)
-		set = shadow_x_mask;
+	if (shadow_nx_mask)
+		set = (access & ACC_EXEC_MASK) ? 0 : shadow_nx_mask;
 	else
-		set = shadow_nx_mask;
-	clear = set ^ (shadow_nx_mask | shadow_x_mask);
+		set =
+			(access & ACC_EXEC_MASK ? shadow_xs_mask : 0) |
+			(access & ACC_USER_EXEC_MASK ? shadow_xu_mask : 0);
+	clear = set ^ (shadow_nx_mask | shadow_xs_mask | shadow_xu_mask);
 	return modify_spte_protections(spte, set, clear);
 }
 
@@ -388,7 +399,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
 
 	spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
 		PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
-		shadow_user_mask | shadow_x_mask | shadow_me_value;
+		shadow_user_mask | shadow_xs_mask | shadow_xu_mask | shadow_me_value;
 
 	if (ad_disabled)
 		spte |= SPTE_TDP_AD_DISABLED;
@@ -496,7 +507,8 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits)
 	shadow_accessed_mask	= VMX_EPT_ACCESS_BIT;
 	shadow_dirty_mask	= VMX_EPT_DIRTY_BIT;
 	shadow_nx_mask		= 0ull;
-	shadow_x_mask		= VMX_EPT_EXECUTABLE_MASK;
+	shadow_xs_mask		= VMX_EPT_EXECUTABLE_MASK;
+	shadow_xu_mask		= VMX_EPT_EXECUTABLE_MASK;
 	shadow_present_mask	= VMX_EPT_SUPPRESS_VE_BIT;
 
 	shadow_acc_track_mask	= VMX_EPT_RWX_MASK;
@@ -547,7 +559,8 @@ void kvm_mmu_reset_all_pte_masks(void)
 	shadow_accessed_mask	= PT_ACCESSED_MASK;
 	shadow_dirty_mask	= PT_DIRTY_MASK;
 	shadow_nx_mask		= PT64_NX_MASK;
-	shadow_x_mask		= 0;
+	shadow_xs_mask		= 0;
+	shadow_xu_mask		= 0;
 	shadow_present_mask	= PT_PRESENT_MASK;
 
 	shadow_acc_track_mask	= 0;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 0c305f2f4ba0..7323ff19056b 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -54,7 +54,8 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
 
 #define ACC_READ_MASK    PT_PRESENT_MASK
 #define ACC_WRITE_MASK   PT_WRITABLE_MASK
-#define ACC_USER_MASK    PT_USER_MASK
+#define ACC_USER_MASK    PT_USER_MASK   /* non EPT */
+#define ACC_USER_EXEC_MASK ACC_USER_MASK /* EPT only */
 #define ACC_EXEC_MASK    8
 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
 
@@ -184,8 +185,9 @@ extern bool __read_mostly kvm_ad_enabled;
 extern u64 __read_mostly shadow_host_writable_mask;
 extern u64 __read_mostly shadow_mmu_writable_mask;
 extern u64 __read_mostly shadow_nx_mask;
-extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
 extern u64 __read_mostly shadow_user_mask;
+extern u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
+extern u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
 extern u64 __read_mostly shadow_accessed_mask;
 extern u64 __read_mostly shadow_dirty_mask;
 extern u64 __read_mostly shadow_mmio_value;
@@ -352,7 +354,7 @@ static inline bool is_last_spte(u64 pte, int level)
 
 static inline bool is_executable_pte(u64 spte)
 {
-	return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
+	return (spte & (shadow_xs_mask | shadow_xu_mask | shadow_nx_mask)) != shadow_nx_mask;
 }
 
 static inline kvm_pfn_t spte_to_pfn(u64 pte)
-- 
2.52.0
Re: [PATCH 10/22] KVM: x86/mmu: split XS/XU bits for MBEC
Posted by Huang, Kai 1 week, 3 days ago
On Sat, 2026-03-21 at 01:09 +0100, Paolo Bonzini wrote:
> When EPT is in use, replace ACC_USER_MASK with ACC_USER_EXEC_MASK,
> so that supervisor and user-mode execution can be controlled
> independently (ACC_USER_MASK would not allow a setting similar to
> XU=0 XS=1 W=1 R=1).
> 
> Replace shadow_x_mask with shadow_xs_mask/shadow_xu_mask, to allow
> setting XS and XU bits separately in EPT entries.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/include/asm/vmx.h     |  1 +
>  arch/x86/kvm/mmu/mmu.c         | 15 ++++++++---
>  arch/x86/kvm/mmu/mmutrace.h    |  6 ++---
>  arch/x86/kvm/mmu/paging_tmpl.h |  4 +++
>  arch/x86/kvm/mmu/spte.c        | 47 ++++++++++++++++++++++------------
>  arch/x86/kvm/mmu/spte.h        |  8 +++---
>  6 files changed, 55 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 4a0804cc7c82..0041f8a77447 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -538,6 +538,7 @@ enum vmcs_field {
>  #define VMX_EPT_IPAT_BIT    			(1ull << 6)
>  #define VMX_EPT_ACCESS_BIT			(1ull << 8)
>  #define VMX_EPT_DIRTY_BIT			(1ull << 9)
> +#define VMX_EPT_USER_EXECUTABLE_MASK		(1ull << 10)
>  #define VMX_EPT_SUPPRESS_VE_BIT			(1ull << 63)
>  #define VMX_EPT_RWX_MASK                        (VMX_EPT_READABLE_MASK |       \
>  						 VMX_EPT_WRITABLE_MASK |       \

Should we include VMX_EPT_USER_EXECUTABLE_MASK to VMX_EPT_RWX_MASK?


[...]

> @@ -496,7 +507,8 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits)
>  	shadow_accessed_mask	= VMX_EPT_ACCESS_BIT;
>  	shadow_dirty_mask	= VMX_EPT_DIRTY_BIT;
>  	shadow_nx_mask		= 0ull;
> -	shadow_x_mask		= VMX_EPT_EXECUTABLE_MASK;
> +	shadow_xs_mask		= VMX_EPT_EXECUTABLE_MASK;
> +	shadow_xu_mask		= VMX_EPT_EXECUTABLE_MASK;

Shouldn't 'shadow_xu_mask' be VMX_EPT_USER_EXECUTABLE_MASK?


Btw, with MBEC it's a bit weird to me that we continue to just use
110 (R=0,W=1,X=1) to trigger EPT misconfig for MMIO caching:

    /*
     * EPT Misconfigurations are generated if the value of bits 2:0        
     * of an EPT paging-structure entry is 110b (write/execute).           
     */
    kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
                               VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT,
			       0);

Per SDM, R=0 and W=1 is always guaranteed to trigger EPT misconfig (see
 30.3.3.1 EPT Misconfigurations).  Maybe we can just use that for MMIO
caching?

We can then remove both X and XU bit from mmio_mask too.
Re: [PATCH 10/22] KVM: x86/mmu: split XS/XU bits for MBEC
Posted by Paolo Bonzini 1 week, 3 days ago
On 3/24/26 11:45, Huang, Kai wrote:
> On Sat, 2026-03-21 at 01:09 +0100, Paolo Bonzini wrote:
>> When EPT is in use, replace ACC_USER_MASK with ACC_USER_EXEC_MASK,
>> so that supervisor and user-mode execution can be controlled
>> independently (ACC_USER_MASK would not allow a setting similar to
>> XU=0 XS=1 W=1 R=1).
>>
>> Replace shadow_x_mask with shadow_xs_mask/shadow_xu_mask, to allow
>> setting XS and XU bits separately in EPT entries.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   arch/x86/include/asm/vmx.h     |  1 +
>>   arch/x86/kvm/mmu/mmu.c         | 15 ++++++++---
>>   arch/x86/kvm/mmu/mmutrace.h    |  6 ++---
>>   arch/x86/kvm/mmu/paging_tmpl.h |  4 +++
>>   arch/x86/kvm/mmu/spte.c        | 47 ++++++++++++++++++++++------------
>>   arch/x86/kvm/mmu/spte.h        |  8 +++---
>>   6 files changed, 55 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>> index 4a0804cc7c82..0041f8a77447 100644
>> --- a/arch/x86/include/asm/vmx.h
>> +++ b/arch/x86/include/asm/vmx.h
>> @@ -538,6 +538,7 @@ enum vmcs_field {
>>   #define VMX_EPT_IPAT_BIT    			(1ull << 6)
>>   #define VMX_EPT_ACCESS_BIT			(1ull << 8)
>>   #define VMX_EPT_DIRTY_BIT			(1ull << 9)
>> +#define VMX_EPT_USER_EXECUTABLE_MASK		(1ull << 10)
>>   #define VMX_EPT_SUPPRESS_VE_BIT			(1ull << 63)
>>   #define VMX_EPT_RWX_MASK                        (VMX_EPT_READABLE_MASK |       \
>>   						 VMX_EPT_WRITABLE_MASK |       \
> 
> Should we include VMX_EPT_USER_EXECUTABLE_MASK to VMX_EPT_RWX_MASK?

No, because it is used for many cases to refer to bits 0-2, for example:

  #define EPT_VIOLATION_RWX_TO_PROT(__epte)
     (((__epte) & VMX_EPT_RWX_MASK) << 3)

Bit 10 is handled separately because it's not contiguous and has a 
different mapping to the exit qualification (to bit 6 instead of bit 13).

(However, there is a bug later in the series where shadow_acc_track_mask 
needs to have VMX_EPT_USER_EXECUTABLE_MASK in it).

> 
> [...]
> 
>> @@ -496,7 +507,8 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits)
>>   	shadow_accessed_mask	= VMX_EPT_ACCESS_BIT;
>>   	shadow_dirty_mask	= VMX_EPT_DIRTY_BIT;
>>   	shadow_nx_mask		= 0ull;
>> -	shadow_x_mask		= VMX_EPT_EXECUTABLE_MASK;
>> +	shadow_xs_mask		= VMX_EPT_EXECUTABLE_MASK;
>> +	shadow_xu_mask		= VMX_EPT_EXECUTABLE_MASK;
> 
> Shouldn't 'shadow_xu_mask' be VMX_EPT_USER_EXECUTABLE_MASK?

Not yet, because shadow_xu_mask is used to set executable permissions as 
well.  I suppose you could make it 0 when MBEC is disabled instead of 
VMX_EPT_EXECUTABLE_MASK, but it can only be VMX_EPT_USER_EXECUTABLE_MASK 
when MBEC is enabled.

> 
> 
> Btw, with MBEC it's a bit weird to me that we continue to just use
> 110 (R=0,W=1,X=1) to trigger EPT misconfig for MMIO caching:
> 
>      /*
>       * EPT Misconfigurations are generated if the value of bits 2:0
>       * of an EPT paging-structure entry is 110b (write/execute).
>       */
>      kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
>                                 VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT,
> 			       0);
> 
> Per SDM, R=0 and W=1 is always guaranteed to trigger EPT misconfig (see
>   30.3.3.1 EPT Misconfigurations).  Maybe we can just use that for MMIO
> caching?
> 
> We can then remove both X and XU bit from mmio_mask too.

Maybe but is it worth it?  (Based on this we could keep bit 10 in 
MMIO_SPTE_GEN_LOW_END, after all, because W=1 R=0 would give a 
misconfiguration independent of the value of XU; but again I'm not sure 
it's worth it).

Paolo

Re: [PATCH 10/22] KVM: x86/mmu: split XS/XU bits for MBEC
Posted by Huang, Kai 1 week, 2 days ago
On Tue, 2026-03-24 at 12:24 +0100, Paolo Bonzini wrote:
> On 3/24/26 11:45, Huang, Kai wrote:
> > On Sat, 2026-03-21 at 01:09 +0100, Paolo Bonzini wrote:
> > > When EPT is in use, replace ACC_USER_MASK with ACC_USER_EXEC_MASK,
> > > so that supervisor and user-mode execution can be controlled
> > > independently (ACC_USER_MASK would not allow a setting similar to
> > > XU=0 XS=1 W=1 R=1).
> > > 
> > > Replace shadow_x_mask with shadow_xs_mask/shadow_xu_mask, to allow
> > > setting XS and XU bits separately in EPT entries.
> > > 
> > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > > ---
> > >   arch/x86/include/asm/vmx.h     |  1 +
> > >   arch/x86/kvm/mmu/mmu.c         | 15 ++++++++---
> > >   arch/x86/kvm/mmu/mmutrace.h    |  6 ++---
> > >   arch/x86/kvm/mmu/paging_tmpl.h |  4 +++
> > >   arch/x86/kvm/mmu/spte.c        | 47 ++++++++++++++++++++++------------
> > >   arch/x86/kvm/mmu/spte.h        |  8 +++---
> > >   6 files changed, 55 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> > > index 4a0804cc7c82..0041f8a77447 100644
> > > --- a/arch/x86/include/asm/vmx.h
> > > +++ b/arch/x86/include/asm/vmx.h
> > > @@ -538,6 +538,7 @@ enum vmcs_field {
> > >   #define VMX_EPT_IPAT_BIT    			(1ull << 6)
> > >   #define VMX_EPT_ACCESS_BIT			(1ull << 8)
> > >   #define VMX_EPT_DIRTY_BIT			(1ull << 9)
> > > +#define VMX_EPT_USER_EXECUTABLE_MASK		(1ull << 10)
> > >   #define VMX_EPT_SUPPRESS_VE_BIT			(1ull << 63)
> > >   #define VMX_EPT_RWX_MASK                        (VMX_EPT_READABLE_MASK |       \
> > >   						 VMX_EPT_WRITABLE_MASK |       \
> > 
> > Should we include VMX_EPT_USER_EXECUTABLE_MASK to VMX_EPT_RWX_MASK?
> 
> No, because it is used for many cases to refer to bits 0-2, for example:
> 
>   #define EPT_VIOLATION_RWX_TO_PROT(__epte)
>      (((__epte) & VMX_EPT_RWX_MASK) << 3)
> 
> Bit 10 is handled separately because it's not contiguous and has a 
> different mapping to the exit qualification (to bit 6 instead of bit 13).

OK.  It's a bit unfortunate but we can always explicitly get
EPT_VIOLATION_PROT_USER_EXEC from the VMX_EPT_USER_EXECUTABLE_MASK.

> 
> (However, there is a bug later in the series where shadow_acc_track_mask 
> needs to have VMX_EPT_USER_EXECUTABLE_MASK in it).

Right we need to track the XU bit too.

> 
> > 
> > [...]
> > 
> > > @@ -496,7 +507,8 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits)
> > >   	shadow_accessed_mask	= VMX_EPT_ACCESS_BIT;
> > >   	shadow_dirty_mask	= VMX_EPT_DIRTY_BIT;
> > >   	shadow_nx_mask		= 0ull;
> > > -	shadow_x_mask		= VMX_EPT_EXECUTABLE_MASK;
> > > +	shadow_xs_mask		= VMX_EPT_EXECUTABLE_MASK;
> > > +	shadow_xu_mask		= VMX_EPT_EXECUTABLE_MASK;
> > 
> > Shouldn't 'shadow_xu_mask' be VMX_EPT_USER_EXECUTABLE_MASK?
> 
> Not yet, because shadow_xu_mask is used to set executable permissions as 
> well.  I suppose you could make it 0 when MBEC is disabled instead of 
> VMX_EPT_EXECUTABLE_MASK, but it can only be VMX_EPT_USER_EXECUTABLE_MASK 
> when MBEC is enabled.

I see.  It's changed to the right value in a later patch which actually
turns on MBEC.

> 
> > 
> > 
> > Btw, with MBEC it's a bit weird to me that we continue to just use
> > 110 (R=0,W=1,X=1) to trigger EPT misconfig for MMIO caching:
> > 
> >      /*
> >       * EPT Misconfigurations are generated if the value of bits 2:0
> >       * of an EPT paging-structure entry is 110b (write/execute).
> >       */
> >      kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
> >                                 VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT,
> > 			       0);
> > 
> > Per SDM, R=0 and W=1 is always guaranteed to trigger EPT misconfig (see
> >   30.3.3.1 EPT Misconfigurations).  Maybe we can just use that for MMIO
> > caching?
> > 
> > We can then remove both X and XU bit from mmio_mask too.
> 
> Maybe but is it worth it?  (Based on this we could keep bit 10 in 
> MMIO_SPTE_GEN_LOW_END, after all, because W=1 R=0 would give a 
> misconfiguration independent of the value of XU; but again I'm not sure 
> it's worth it).

It looks promising to me since we can have a slightly clearer code (IMHO)
and one more bit for MMIO gen.  But no strong opinion :-)