Adjust the SPTE_MMIO_ALLOWED_MASK and associated values to make these
masks aware of PTE Bit 10, to be used by Intel MBEC.
Intel SDM 30.3.3.1 EPT Misconfigurations states:
An EPT misconfiguration occurs if translation of a guest-physical
address encounters an EPT paging-structure entry that meets any of
the following conditions:
- Bit 0 of the entry is clear (indicating that data reads are not
allowed) and any of the following hold:
— Bit 1 is set (indicating that data writes are allowed).
— The processor does not support execute-only translations and
either of the following hold:
- Bit 2 is set (indicating that instruction fetches are allowed)
Note: If the “mode-based execute control for EPT” VM-execution
control is 1, setting bit 2 indicates that instruction fetches
are allowed from supervisor-mode linear addresses.
- The “mode-based execute control for EPT” VM-execution control
is 1 and bit 10 is set (indicating that instruction fetches
are allowed from user-mode linear addresses).
For LKML Review:
SDM 30.3.3.1 also states that "Software should read the VMX capability
MSR IA32_VMX_EPT_VPID_CAP to determine whether execute-only
translations are supported (see Appendix A.10)." A.10 indicates that
this is specified by bit 0; if bit 0 is 1, then the processor supports
execute-only transactions by EPT.
Searching around a bit, it looks like this bit is checked by
vmx/capabilities.h:cpu_has_vmx_ept_execute_only(), which is used only
in kvm/vmx/vmx.c:vmx_hardware_setup(), passed as the has_exec_only
argument to kvm_mmu_set_ept_masks(), which uses it to set
shadow_present_mask.
I'm not sure if this actually matters for this change(?), but thought
it was at least worth surfacing for others to consider.
Signed-off-by: Jon Kohler <jon@nutanix.com>
---
arch/x86/include/asm/vmx.h | 6 ++++--
arch/x86/kvm/mmu/spte.h | 13 +++++++------
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 84c5be416f5c..961d37e108b5 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -541,7 +541,8 @@ enum vmcs_field {
#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
VMX_EPT_WRITABLE_MASK | \
- VMX_EPT_EXECUTABLE_MASK)
+ VMX_EPT_EXECUTABLE_MASK | \
+ VMX_EPT_USER_EXECUTABLE_MASK)
#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT)
static inline u8 vmx_eptp_page_walk_level(u64 eptp)
@@ -558,7 +559,8 @@ static inline u8 vmx_eptp_page_walk_level(u64 eptp)
/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \
- VMX_EPT_EXECUTABLE_MASK)
+ VMX_EPT_EXECUTABLE_MASK | \
+ VMX_EPT_USER_EXECUTABLE_MASK)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index dc2f0dc9c46e..1f7b388a56aa 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -98,11 +98,11 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK));
#undef SHADOW_ACC_TRACK_SAVED_MASK
/*
- * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
+ * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of
* the memslots generation and is derived as follows:
*
- * Bits 0-7 of the MMIO generation are propagated to spte bits 3-10
- * Bits 8-18 of the MMIO generation are propagated to spte bits 52-62
+ * Bits 0-6 of the MMIO generation are propagated to spte bits 3-9
+ * Bits 7-17 of the MMIO generation are propagated to spte bits 52-62
*
* The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
* the MMIO generation number, as doing so would require stealing a bit from
@@ -113,7 +113,7 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK));
*/
#define MMIO_SPTE_GEN_LOW_START 3
-#define MMIO_SPTE_GEN_LOW_END 10
+#define MMIO_SPTE_GEN_LOW_END 9
#define MMIO_SPTE_GEN_HIGH_START 52
#define MMIO_SPTE_GEN_HIGH_END 62
@@ -135,7 +135,8 @@ static_assert(!(SPTE_MMU_PRESENT_MASK &
* and so they're off-limits for generation; additional checks ensure the mask
* doesn't overlap legal PA bits), and bit 63 (carved out for future usage).
*/
-#define SPTE_MMIO_ALLOWED_MASK (BIT_ULL(63) | GENMASK_ULL(51, 12) | GENMASK_ULL(2, 0))
+#define SPTE_MMIO_ALLOWED_MASK (BIT_ULL(63) | GENMASK_ULL(51, 12) | \
+ BIT_ULL(10) | GENMASK_ULL(2, 0))
static_assert(!(SPTE_MMIO_ALLOWED_MASK &
(SPTE_MMU_PRESENT_MASK | MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK)));
@@ -143,7 +144,7 @@ static_assert(!(SPTE_MMIO_ALLOWED_MASK &
#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
/* remember to adjust the comment above as well if you change these */
-static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11);
+static_assert(MMIO_SPTE_GEN_LOW_BITS == 7 && MMIO_SPTE_GEN_HIGH_BITS == 11);
#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0)
#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
--
2.43.0
Please be more precise with the shortlogs. "Understand MBEC" is extremely vague. On Thu, Mar 13, 2025, Jon Kohler wrote: > Adjust the SPTE_MMIO_ALLOWED_MASK and associated values to make these > masks aware of PTE Bit 10, to be used by Intel MBEC. Same thing here. "aware of PTE bit 10" doesn't describe the change in a way that allows for quick review of the patch. E.g. KVM: x86/mmu: Exclude EPT MBEC's user-executable bit from the MMIO generation The changelogs also need to explain *why*. If you actually tried to write out justification for why KVM can't use bit 10 for the MMIO generation, then unless you start making stuff up (or Chao and I are missing something), you'll come to same conclusion that Chao and I came to: this patch is unnecessary.
> On May 12, 2025, at 3:37 PM, Sean Christopherson <seanjc@google.com> wrote:
>
> Please be more precise with the shortlogs. "Understand MBEC" is extremely vague.
Ack/Done - fixed across the board in v1 series, though this one in v1 is still
a bit skinny. I have narrowed down this specific patch to *just* focus on this
MMIO generation mask bit, because MBEC can be used as a present bit according to
the SDM, so it seems like it is necessary to carve out of the MMIO mask.
Looking back on this, it seems (correct me if I’m wrong) that this is in line with
the original work on commit 8bad460 ("KVM: x86/mmu: Add sanity check that MMIO
SPTE mask doesn't overlap gen")
> On Thu, Mar 13, 2025, Jon Kohler wrote:
>> Adjust the SPTE_MMIO_ALLOWED_MASK and associated values to make these
>> masks aware of PTE Bit 10, to be used by Intel MBEC.
>
> Same thing here. "aware of PTE bit 10" doesn't describe the change in a way that
> allows for quick review of the patch. E.g.
>
> KVM: x86/mmu: Exclude EPT MBEC's user-executable bit from the MMIO generation
>
> The changelogs also need to explain *why*. If you actually tried to write out
> justification for why KVM can't use bit 10 for the MMIO generation, then unless
> you start making stuff up (or Chao and I are missing something), you'll come to
> same conclusion that Chao and I came to: this patch is unnecessary.
> On May 12, 2025, at 3:37 PM, Sean Christopherson <seanjc@google.com> wrote: > > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > Please be more precise with the shortlogs. "Understand MBEC" is extremely vague. Ack, thanks for the feedback. I’ll tune it up across the board > > On Thu, Mar 13, 2025, Jon Kohler wrote: >> Adjust the SPTE_MMIO_ALLOWED_MASK and associated values to make these >> masks aware of PTE Bit 10, to be used by Intel MBEC. > > Same thing here. "aware of PTE bit 10" doesn't describe the change in a way that > allows for quick review of the patch. E.g. > > KVM: x86/mmu: Exclude EPT MBEC's user-executable bit from the MMIO generation > > The changelogs also need to explain *why*. If you actually tried to write out > justification for why KVM can't use bit 10 for the MMIO generation, then unless > you start making stuff up (or Chao and I are missing something), you'll come to > same conclusion that Chao and I came to: this patch is unnecessary. I’ll take a swing at it again, IIRC I couldn’t get it working without this, but I’ll page that back in and figure it out
On Thu, Mar 13, 2025 at 01:36:52PM -0700, Jon Kohler wrote:
>Adjust the SPTE_MMIO_ALLOWED_MASK and associated values to make these
>masks aware of PTE Bit 10, to be used by Intel MBEC.
>
>Intel SDM 30.3.3.1 EPT Misconfigurations states:
> An EPT misconfiguration occurs if translation of a guest-physical
> address encounters an EPT paging-structure entry that meets any of
> the following conditions:
> - Bit 0 of the entry is clear (indicating that data reads are not
> allowed) and any of the following hold:
> — Bit 1 is set (indicating that data writes are allowed).
> — The processor does not support execute-only translations and
> either of the following hold:
> - Bit 2 is set (indicating that instruction fetches are allowed)
> Note: If the “mode-based execute control for EPT” VM-execution
> control is 1, setting bit 2 indicates that instruction fetches
> are allowed from supervisor-mode linear addresses.
> - The “mode-based execute control for EPT” VM-execution control
> is 1 and bit 10 is set (indicating that instruction fetches
> are allowed from user-mode linear addresses).
>
>For LKML Review:
>SDM 30.3.3.1 also states that "Software should read the VMX capability
>MSR IA32_VMX_EPT_VPID_CAP to determine whether execute-only
>translations are supported (see Appendix A.10)." A.10 indicates that
>this is specified by bit 0; if bit 0 is 1, then the processor supports
>execute-only transactions by EPT.
>
>Searching around a bit, it looks like this bit is checked by
>vmx/capabilities.h:cpu_has_vmx_ept_execute_only(), which is used only
>in kvm/vmx/vmx.c:vmx_hardware_setup(), passed as the has_exec_only
>argument to kvm_mmu_set_ept_masks(), which uses it to set
>shadow_present_mask.
>
>I'm not sure if this actually matters for this change(?), but thought
>it was at least worth surfacing for others to consider.
KVM needs to emulate the hardware behavior when walking guest EPT to report
EPT misconfigurations/violations accurately. IMO, below functions should be
modified:
FNAME(is_present_gpte)
FNAME(is_bad_mt_xwr)
>
>Signed-off-by: Jon Kohler <jon@nutanix.com>
>
>---
> arch/x86/include/asm/vmx.h | 6 ++++--
> arch/x86/kvm/mmu/spte.h | 13 +++++++------
> 2 files changed, 11 insertions(+), 8 deletions(-)
>
>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>index 84c5be416f5c..961d37e108b5 100644
>--- a/arch/x86/include/asm/vmx.h
>+++ b/arch/x86/include/asm/vmx.h
>@@ -541,7 +541,8 @@ enum vmcs_field {
> #define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
> #define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
> VMX_EPT_WRITABLE_MASK | \
>- VMX_EPT_EXECUTABLE_MASK)
>+ VMX_EPT_EXECUTABLE_MASK | \
>+ VMX_EPT_USER_EXECUTABLE_MASK)
> #define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT)
>
> static inline u8 vmx_eptp_page_walk_level(u64 eptp)
>@@ -558,7 +559,8 @@ static inline u8 vmx_eptp_page_walk_level(u64 eptp)
>
> /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
> #define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \
>- VMX_EPT_EXECUTABLE_MASK)
>+ VMX_EPT_EXECUTABLE_MASK | \
>+ VMX_EPT_USER_EXECUTABLE_MASK)
This change is not needed. whether MEBC is enabled doesn't make
VMX_EPT_WRITABLE_MASK | VMX_EPT_EXECUTABLE_MASK a valid entry for EPT.
© 2016 - 2026 Red Hat, Inc.