[RFC PATCH 09/18] KVM: x86/mmu: Extend access bitfield in kvm_mmu_page_role

Jon Kohler posted 18 patches 9 months, 1 week ago
[RFC PATCH 09/18] KVM: x86/mmu: Extend access bitfield in kvm_mmu_page_role
Posted by Jon Kohler 9 months, 1 week ago
From: Mickaël Salaün <mic@digikod.net>

Extend access bitfield from 3 to 4 in kvm_mmu_page_role, where the 4th
bit will be used to track user executable pages with Intel MBEC.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Co-developed-by: Jon Kohler <jon@nutanix.com>
Signed-off-by: Jon Kohler <jon@nutanix.com>
Co-developed-by: Sergey Dyasli <sergey.dyasli@nutanix.com>
Signed-off-by: Sergey Dyasli <sergey.dyasli@nutanix.com>

---
 arch/x86/include/asm/kvm_host.h | 10 +++++-----
 arch/x86/kvm/mmu/mmu.c          |  2 +-
 arch/x86/kvm/mmu/mmutrace.h     |  8 +++++++-
 arch/x86/kvm/mmu/spte.h         |  4 +++-
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 192233eb557a..e8193de802a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -312,10 +312,10 @@ struct kvm_kernel_irq_routing_entry;
  * the number of unique SPs that can theoretically be created is 2^n, where n
  * is the number of bits that are used to compute the role.
  *
- * But, even though there are 19 bits in the mask below, not all combinations
+ * But, even though there are 20 bits in the mask below, not all combinations
  * of modes and flags are possible:
  *
- *   - invalid shadow pages are not accounted, so the bits are effectively 18
+ *   - invalid shadow pages are not accounted, so the bits are effectively 19
  *
  *   - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
  *     execonly and ad_disabled are only used for nested EPT which has
@@ -330,7 +330,7 @@ struct kvm_kernel_irq_routing_entry;
  *     cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
  *
  * Therefore, the maximum number of possible upper-level shadow pages for a
- * single gfn is a bit less than 2^13.
+ * single gfn is a bit less than 2^14.
  */
 union kvm_mmu_page_role {
 	u32 word;
@@ -339,7 +339,7 @@ union kvm_mmu_page_role {
 		unsigned has_4_byte_gpte:1;
 		unsigned quadrant:2;
 		unsigned direct:1;
-		unsigned access:3;
+		unsigned access:4;
 		unsigned invalid:1;
 		unsigned efer_nx:1;
 		unsigned cr0_wp:1;
@@ -348,7 +348,7 @@ union kvm_mmu_page_role {
 		unsigned ad_disabled:1;
 		unsigned guest_mode:1;
 		unsigned passthrough:1;
-		unsigned :5;
+		unsigned:4;
 
 		/*
 		 * This is left at the top of the word so that
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8e853a5fc867..791413b93589 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1915,7 +1915,7 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	 */
 	const union kvm_mmu_page_role sync_role_ign = {
 		.level = 0xf,
-		.access = 0x7,
+		.access = 0xf,
 		.quadrant = 0x3,
 		.passthrough = 0x1,
 	};
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index f35a830ce469..2511fe64ca01 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -22,10 +22,16 @@
 	__entry->root_count = sp->root_count;		\
 	__entry->unsync = sp->unsync;
 
+/*
+ * X == ACC_EXEC_MASK: executable without guest_exec_control and only
+ *                     supervisor execute with guest exec control
+ * x == ACC_USER_EXEC_MASK: user execute with guest exec control
+ */
 #define KVM_MMU_PAGE_PRINTK() ({				        \
 	const char *saved_ptr = trace_seq_buffer_ptr(p);		\
 	static const char *access_str[] = {			        \
-		"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"  \
+		"----", "---X", "-w--", "-w-X", "--u-", "--uX", "-wu-", "-wuX", \
+		"x---", "x--X", "xw--", "xw-X", "xu--", "x-uX", "xwu-", "xwuX"	\
 	};							        \
 	union kvm_mmu_page_role role;				        \
 								        \
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 71d6fe28fafc..d9e22133b6d0 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -45,7 +45,9 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
 #define ACC_EXEC_MASK    1
 #define ACC_WRITE_MASK   PT_WRITABLE_MASK
 #define ACC_USER_MASK    PT_USER_MASK
-#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+#define ACC_USER_EXEC_MASK (1ULL << 3)
+#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | \
+			  ACC_USER_EXEC_MASK)
 
 /* The mask for the R/X bits in EPT PTEs */
 #define SPTE_EPT_READABLE_MASK			0x1ull
-- 
2.43.0

Re: [RFC PATCH 09/18] KVM: x86/mmu: Extend access bitfield in kvm_mmu_page_role
Posted by Sean Christopherson 7 months, 1 week ago
On Thu, Mar 13, 2025, Jon Kohler wrote:
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 71d6fe28fafc..d9e22133b6d0 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -45,7 +45,9 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
>  #define ACC_EXEC_MASK    1
>  #define ACC_WRITE_MASK   PT_WRITABLE_MASK
>  #define ACC_USER_MASK    PT_USER_MASK
> -#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
> +#define ACC_USER_EXEC_MASK (1ULL << 3)
> +#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | \
> +			  ACC_USER_EXEC_MASK)

This is very subtly a massive change, and I'm not convinced its one we want to
make.  All usage in the non-nested TDP flows is arguably wrong, because KVM should
never enable MBEC when using non-nested TDP.

And the use in kvm_calc_shadow_ept_root_page_role() is wrong, because the root
page role shouldn't include ACC_USER_EXEC_MASK if the associated VMCS doesn't
have MBEC.  Ditto for the use in kvm_calc_cpu_role().

So I'm pretty sure the only bit of this change that is desriable/correct is the
usage in kvm_mmu_page_get_access().  (And I guess maybe trace_mark_mmio_spte()?)

Off the cuff, I don't know what the best approach is.  One thought would be to
prep for adding ACC_USER_EXEC_MASK to ACC_ALL by introducing ACC_RWX and using
that where KVM really just wants to set RWX permissions.  That would free up
ACC_ALL for the few cases where KVM really truly wants to capture all access bits.
Re: [RFC PATCH 09/18] KVM: x86/mmu: Extend access bitfield in kvm_mmu_page_role
Posted by Jon Kohler 7 months, 1 week ago

> On May 12, 2025, at 2:32 PM, Sean Christopherson <seanjc@google.com> wrote:
> 
> !-------------------------------------------------------------------|
>  CAUTION: External Email
> 
> |-------------------------------------------------------------------!
> 
> On Thu, Mar 13, 2025, Jon Kohler wrote:
>> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
>> index 71d6fe28fafc..d9e22133b6d0 100644
>> --- a/arch/x86/kvm/mmu/spte.h
>> +++ b/arch/x86/kvm/mmu/spte.h
>> @@ -45,7 +45,9 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
>> #define ACC_EXEC_MASK    1
>> #define ACC_WRITE_MASK   PT_WRITABLE_MASK
>> #define ACC_USER_MASK    PT_USER_MASK
>> -#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
>> +#define ACC_USER_EXEC_MASK (1ULL << 3)
>> +#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | \
>> +  ACC_USER_EXEC_MASK)
> 
> This is very subtly a massive change, and I'm not convinced its one we want to
> make.  All usage in the non-nested TDP flows is arguably wrong, because KVM should
> never enable MBEC when using non-nested TDP.
> 
> And the use in kvm_calc_shadow_ept_root_page_role() is wrong, because the root
> page role shouldn't include ACC_USER_EXEC_MASK if the associated VMCS doesn't
> have MBEC.  Ditto for the use in kvm_calc_cpu_role().
> 
> So I'm pretty sure the only bit of this change that is desriable/correct is the
> usage in kvm_mmu_page_get_access().  (And I guess maybe trace_mark_mmio_spte()?)
> 
> Off the cuff, I don't know what the best approach is.  One thought would be to
> prep for adding ACC_USER_EXEC_MASK to ACC_ALL by introducing ACC_RWX and using
> that where KVM really just wants to set RWX permissions.  That would free up
> ACC_ALL for the few cases where KVM really truly wants to capture all access bits.

At first blush, I like this ACC_RWX idea. I’ll chew on that and see what
trouble I can get in.