[PATCH v13 057/113] KVM: TDX: MTRR: implement get_mt_mask() for TDX

isaku.yamahata@intel.com posted 113 patches 2 years, 11 months ago
There is a newer version of this series
[PATCH v13 057/113] KVM: TDX: MTRR: implement get_mt_mask() for TDX
Posted by isaku.yamahata@intel.com 2 years, 11 months ago
From: Isaku Yamahata <isaku.yamahata@intel.com>

Because TDX virtualize cpuid[0x1].EDX[MTRR: bit 12] to fixed 1, guest TD
thinks MTRR is supported.  Although TDX supports only WB for private GPA,
it's desirable to support MTRR for shared GPA.  As guest access to MTRR
MSRs causes #VE and KVM/x86 tracks the values of MTRR MSRs, the remining
part is to implement get_mt_mask method for TDX for shared GPA.

Pass around shared bit from kvm fault handler to get_mt_mask method so that
it can determine if the gfn is shared or private.  Implement get_mt_mask()
following vmx case for shared GPA and return WB for private GPA.
the existing vmx_get_mt_mask() can't be directly used as CPU state(CR0.CD)
is protected.  GFN passed to kvm_mtrr_check_gfn_range_consistency() should
include shared bit.

Suggested-by: Kai Huang <kai.huang@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
Changes from v11 to V12
- Make common function for VMX and TDX
- pass around shared bit from KVM fault handler to get_mt_mask method
- updated commit message
---
 arch/x86/kvm/mmu/mmu.c     |  7 ++++++-
 arch/x86/kvm/mmu/spte.c    |  5 +++--
 arch/x86/kvm/mmu/spte.h    |  2 +-
 arch/x86/kvm/vmx/common.h  |  2 ++
 arch/x86/kvm/vmx/main.c    | 11 ++++++++++-
 arch/x86/kvm/vmx/tdx.c     | 17 +++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     |  5 +++--
 arch/x86/kvm/vmx/x86_ops.h |  2 ++
 8 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6074aa09cd87..fb858594cfec 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4569,7 +4569,12 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 	if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
 		for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
 			int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
-			gfn_t base = gfn_round_for_level(fault->gfn,
+			/*
+			 * kvm_mtrr_check_gfn_range_consistency() requires gfn
+			 * including shared bit.  fault->gfn is masked out with
+			 * shared bit.  So fault->gfn can't be used.
+			 */
+			gfn_t base = gfn_round_for_level(gpa_to_gfn(fault->addr),
 							 fault->max_level);
 
 			if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 180907ef26c7..7adb0d00ec4b 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -137,13 +137,14 @@ bool spte_has_volatile_bits(u64 spte)
 
 bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	       const struct kvm_memory_slot *slot,
-	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
+	       unsigned int pte_access, gfn_t gfn_including_shared, kvm_pfn_t pfn,
 	       u64 old_spte, bool prefetch, bool can_unsync,
 	       bool host_writable, u64 *new_spte)
 {
 	int level = sp->role.level;
 	u64 spte = SPTE_MMU_PRESENT_MASK;
 	bool wrprot = false;
+	gfn_t gfn = gfn_including_shared & ~kvm_gfn_shared_mask(vcpu->kvm);
 
 	WARN_ON_ONCE(!pte_access && !shadow_present_mask);
 
@@ -191,7 +192,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		spte |= PT_PAGE_SIZE_MASK;
 
 	if (shadow_memtype_mask)
-		spte |= static_call(kvm_x86_get_mt_mask)(vcpu, gfn,
+		spte |= static_call(kvm_x86_get_mt_mask)(vcpu, gfn_including_shared,
 							 kvm_is_mmio_pfn(pfn));
 	if (host_writable)
 		spte |= shadow_host_writable_mask;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 41973fe6bc22..62280c4b8c81 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -481,7 +481,7 @@ bool spte_has_volatile_bits(u64 spte);
 
 bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	       const struct kvm_memory_slot *slot,
-	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
+	       unsigned int pte_access, gfn_t gfn_including_shared, kvm_pfn_t pfn,
 	       u64 old_spte, bool prefetch, bool can_unsync,
 	       bool host_writable, u64 *new_spte);
 u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte,
diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index 235908f3e044..422b24af7fc1 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -6,6 +6,8 @@
 
 #include "mmu.h"
 
+u8 __vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio, bool check_cr0_cd);
+
 static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
 					     unsigned long exit_qualification)
 {
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 902b57506291..55001b34e1f0 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -3,6 +3,7 @@
 
 #include "x86_ops.h"
 #include "mmu.h"
+#include "common.h"
 #include "vmx.h"
 #include "nested.h"
 #include "mmu.h"
@@ -228,6 +229,14 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
 	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
 }
 
+static u8 vt_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	if (is_td_vcpu(vcpu))
+		return tdx_get_mt_mask(vcpu, gfn, is_mmio);
+
+	return __vmx_get_mt_mask(vcpu, gfn, is_mmio, true);
+}
+
 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
 {
 	if (!is_td(kvm))
@@ -348,7 +357,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.set_identity_map_addr = vmx_set_identity_map_addr,
-	.get_mt_mask = vmx_get_mt_mask,
+	.get_mt_mask = vt_get_mt_mask,
 
 	.get_exit_info = vmx_get_exit_info,
 
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 6ab7580de69c..b7b4ab60f96d 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -5,6 +5,7 @@
 
 #include "capabilities.h"
 #include "x86_ops.h"
+#include "common.h"
 #include "tdx.h"
 #include "vmx.h"
 #include "x86.h"
@@ -350,6 +351,22 @@ int tdx_vm_init(struct kvm *kvm)
 	return 0;
 }
 
+u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	/* TDX private GPA is always WB. */
+	if (!(gfn & kvm_gfn_shared_mask(vcpu->kvm))) {
+		/* MMIO is only for shared GPA. */
+		WARN_ON_ONCE(is_mmio);
+		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
+	}
+
+	/* Drop shared bit as MTRR doesn't know about shared bit. */
+	gfn = kvm_gfn_to_private(vcpu->kvm, gfn);
+
+	/* As TDX enforces CR0.CD to 0, pass check_cr0_cd = false. */
+	return __vmx_get_mt_mask(vcpu, gfn, is_mmio, false);
+}
+
 int tdx_vcpu_create(struct kvm_vcpu *vcpu)
 {
 	/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 23321b2208ae..b8d8f7fbeb69 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7568,7 +7568,8 @@ int vmx_vm_init(struct kvm *kvm)
 	return 0;
 }
 
-u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+u8 __vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio,
+		     bool check_cr0_cd)
 {
 	u8 cache;
 
@@ -7596,7 +7597,7 @@ u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
 		return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
 
-	if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
+	if (check_cr0_cd && kvm_read_cr0(vcpu) & X86_CR0_CD) {
 		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
 			cache = MTRR_TYPE_WRBACK;
 		else
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index b73007586d8b..eba10dabc45f 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -155,6 +155,7 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
 int tdx_vcpu_create(struct kvm_vcpu *vcpu);
 void tdx_vcpu_free(struct kvm_vcpu *vcpu);
 void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
+u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
 
@@ -180,6 +181,7 @@ static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOP
 static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
 static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
 static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
+static inline u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; }
 
 static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
 
-- 
2.25.1
Re: [PATCH v13 057/113] KVM: TDX: MTRR: implement get_mt_mask() for TDX
Posted by Huang, Kai 2 years, 11 months ago
On Sun, 2023-03-12 at 10:56 -0700, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Because TDX virtualize cpuid[0x1].EDX[MTRR: bit 12] to fixed 1, guest TD
> thinks MTRR is supported.  Although TDX supports only WB for private GPA,
> it's desirable to support MTRR for shared GPA.  As guest access to MTRR
> MSRs causes #VE and KVM/x86 tracks the values of MTRR MSRs, the remining
> part is to implement get_mt_mask method for TDX for shared GPA.
> 
> Pass around shared bit from kvm fault handler to get_mt_mask method so that
> it can determine if the gfn is shared or private.  
> 

I think we have an Xarray to query whether a given GFN is shared or private?
Can we use that?

> Implement get_mt_mask()
> following vmx case for shared GPA and return WB for private GPA.
> the existing vmx_get_mt_mask() can't be directly used as CPU state(CR0.CD)
> is protected.  GFN passed to kvm_mtrr_check_gfn_range_consistency() should
> include shared bit.
> 
> Suggested-by: Kai Huang <kai.huang@intel.com>

I am not sure what is suggested by me?

I thought what I suggested is we should have a dedicated patch to handle MTRR
for TDX putting all related things together.

> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> Changes from v11 to V12
> - Make common function for VMX and TDX
> - pass around shared bit from KVM fault handler to get_mt_mask method
> - updated commit message
> ---
>  arch/x86/kvm/mmu/mmu.c     |  7 ++++++-
>  arch/x86/kvm/mmu/spte.c    |  5 +++--
>  arch/x86/kvm/mmu/spte.h    |  2 +-
>  arch/x86/kvm/vmx/common.h  |  2 ++
>  arch/x86/kvm/vmx/main.c    | 11 ++++++++++-
>  arch/x86/kvm/vmx/tdx.c     | 17 +++++++++++++++++
>  arch/x86/kvm/vmx/vmx.c     |  5 +++--
>  arch/x86/kvm/vmx/x86_ops.h |  2 ++
>  8 files changed, 44 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 6074aa09cd87..fb858594cfec 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4569,7 +4569,12 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  	if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
>  		for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
>  			int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
> -			gfn_t base = gfn_round_for_level(fault->gfn,
> +			/*
> +			 * kvm_mtrr_check_gfn_range_consistency() requires gfn
> +			 * including shared bit.  
> 

Why? MTRR MSRs should always contain the true GFN without shared bit, correct?

Then why kvm_mtrr_check_gfn_range_consistency() needs shared bit?

> fault->gfn is masked out with
> +			 * shared bit.  So fault->gfn can't be used.
> +			 */
> +			gfn_t base = gfn_round_for_level(gpa_to_gfn(fault->addr),
>  							 fault->max_level);
>  
>  			if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
> index 180907ef26c7..7adb0d00ec4b 100644
> --- a/arch/x86/kvm/mmu/spte.c
> +++ b/arch/x86/kvm/mmu/spte.c
> @@ -137,13 +137,14 @@ bool spte_has_volatile_bits(u64 spte)
>  
>  bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  	       const struct kvm_memory_slot *slot,
> -	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
> +	       unsigned int pte_access, gfn_t gfn_including_shared, kvm_pfn_t pfn,

IMHO 'gfn_including_shared' is ugly, especially changing from 'gfn' in _THIS_
particular patch.

>  	       u64 old_spte, bool prefetch, bool can_unsync,
>  	       bool host_writable, u64 *new_spte)
>  {
>  	int level = sp->role.level;
>  	u64 spte = SPTE_MMU_PRESENT_MASK;
>  	bool wrprot = false;
> +	gfn_t gfn = gfn_including_shared & ~kvm_gfn_shared_mask(vcpu->kvm);
>  
>  	WARN_ON_ONCE(!pte_access && !shadow_present_mask);
>  
> @@ -191,7 +192,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  		spte |= PT_PAGE_SIZE_MASK;
>  
>  	if (shadow_memtype_mask)
> -		spte |= static_call(kvm_x86_get_mt_mask)(vcpu, gfn,
> +		spte |= static_call(kvm_x86_get_mt_mask)(vcpu, gfn_including_shared,
>  							 kvm_is_mmio_pfn(pfn));
>  	if (host_writable)
>  		spte |= shadow_host_writable_mask;
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 41973fe6bc22..62280c4b8c81 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -481,7 +481,7 @@ bool spte_has_volatile_bits(u64 spte);
>  
>  bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  	       const struct kvm_memory_slot *slot,
> -	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
> +	       unsigned int pte_access, gfn_t gfn_including_shared, kvm_pfn_t pfn,
>  	       u64 old_spte, bool prefetch, bool can_unsync,
>  	       bool host_writable, u64 *new_spte);
>  u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte,
> diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
> index 235908f3e044..422b24af7fc1 100644
> --- a/arch/x86/kvm/vmx/common.h
> +++ b/arch/x86/kvm/vmx/common.h
> @@ -6,6 +6,8 @@
>  
>  #include "mmu.h"
>  
> +u8 __vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio, bool check_cr0_cd);
> +
>  static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
>  					     unsigned long exit_qualification)
>  {
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index 902b57506291..55001b34e1f0 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -3,6 +3,7 @@
>  
>  #include "x86_ops.h"
>  #include "mmu.h"
> +#include "common.h"
>  #include "vmx.h"
>  #include "nested.h"
>  #include "mmu.h"
> @@ -228,6 +229,14 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
>  	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
>  }
>  
> +static u8 vt_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +{
> +	if (is_td_vcpu(vcpu))
> +		return tdx_get_mt_mask(vcpu, gfn, is_mmio);
> +
> +	return __vmx_get_mt_mask(vcpu, gfn, is_mmio, true);
> +}
> +
>  static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>  {
>  	if (!is_td(kvm))
> @@ -348,7 +357,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>  
>  	.set_tss_addr = vmx_set_tss_addr,
>  	.set_identity_map_addr = vmx_set_identity_map_addr,
> -	.get_mt_mask = vmx_get_mt_mask,
> +	.get_mt_mask = vt_get_mt_mask,
>  
>  	.get_exit_info = vmx_get_exit_info,
>  
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 6ab7580de69c..b7b4ab60f96d 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -5,6 +5,7 @@
>  
>  #include "capabilities.h"
>  #include "x86_ops.h"
> +#include "common.h"
>  #include "tdx.h"
>  #include "vmx.h"
>  #include "x86.h"
> @@ -350,6 +351,22 @@ int tdx_vm_init(struct kvm *kvm)
>  	return 0;
>  }
>  
> +u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +{
> +	/* TDX private GPA is always WB. */
> +	if (!(gfn & kvm_gfn_shared_mask(vcpu->kvm))) {
> +		/* MMIO is only for shared GPA. */
> +		WARN_ON_ONCE(is_mmio);
> +		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
> +	}
> +
> +	/* Drop shared bit as MTRR doesn't know about shared bit. */
> +	gfn = kvm_gfn_to_private(vcpu->kvm, gfn);
> +
> +	/* As TDX enforces CR0.CD to 0, pass check_cr0_cd = false. */
> +	return __vmx_get_mt_mask(vcpu, gfn, is_mmio, false);
> +}


Do you know whether there's any use case of non-coherent device assignment to
TDX guest?

IMHO, we should just disallow TDX guest to support non-coherent device
assignment, so that we can just return WB for both private and shared.

If we support non-coherent device assignment, then if guest sets private memory
to non-WB memory, it believes the memory type is non-WB, but in fact TDX always
map private memory as WB.

Will this be a problem, i.e. if assigned device can DMA to private memory
directly in the future?

> +
>  int tdx_vcpu_create(struct kvm_vcpu *vcpu)
>  {
>  	/*
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 23321b2208ae..b8d8f7fbeb69 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7568,7 +7568,8 @@ int vmx_vm_init(struct kvm *kvm)
>  	return 0;
>  }
>  
> -u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +u8 __vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio,
> +		     bool check_cr0_cd)
>  {
>  	u8 cache;
>  
> @@ -7596,7 +7597,7 @@ u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
>  	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
>  		return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
>  
> -	if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
> +	if (check_cr0_cd && kvm_read_cr0(vcpu) & X86_CR0_CD) {
>  		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>  			cache = MTRR_TYPE_WRBACK;
>  		else

Re: [PATCH v13 057/113] KVM: TDX: MTRR: implement get_mt_mask() for TDX
Posted by Isaku Yamahata 2 years, 10 months ago
On Thu, Mar 16, 2023 at 10:38:02AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> On Sun, 2023-03-12 at 10:56 -0700, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> > Because TDX virtualize cpuid[0x1].EDX[MTRR: bit 12] to fixed 1, guest TD
> > thinks MTRR is supported.  Although TDX supports only WB for private GPA,
> > it's desirable to support MTRR for shared GPA.  As guest access to MTRR
> > MSRs causes #VE and KVM/x86 tracks the values of MTRR MSRs, the remining
> > part is to implement get_mt_mask method for TDX for shared GPA.
> > 
> > Pass around shared bit from kvm fault handler to get_mt_mask method so that
> > it can determine if the gfn is shared or private.  
> > 
> 
> I think we have an Xarray to query whether a given GFN is shared or private?
> Can we use that?
> 
> > Implement get_mt_mask()
> > following vmx case for shared GPA and return WB for private GPA.
> > the existing vmx_get_mt_mask() can't be directly used as CPU state(CR0.CD)
> > is protected.  GFN passed to kvm_mtrr_check_gfn_range_consistency() should
> > include shared bit.
> > 
> > Suggested-by: Kai Huang <kai.huang@intel.com>
> 
> I am not sure what is suggested by me?
> 
> I thought what I suggested is we should have a dedicated patch to handle MTRR
> for TDX putting all related things together.

Sure. After looking at the specs, my conclusion is that guest TD can't update
MTRR reliably.  Because MTRR update requires to disable cache(CR0.CR=1), cache
flush, and tlb flush. (SDM 3a 12.11.7: MTRR maintenance programming interface)
Linux implements MTRR update logic according to it.

While TDX enforces CR0.CD=0, trying to set CR0.CD=1 results in #GP.  At the
same time, it reports that MTRR is available via cpuid.  So I come up with the
followings.

- MTRRCap(RO): report no feature available
  SMRR=0: SMRR interface unsupported
  WC=0: write combining unsupported
  FIX=0: Fixed range registers unsupported
  VCNT=0: number of variable range regitsers = 0

- MTRRDefType(R/W): Only writeback even with reset state.
  E=1: enable MTRR (E=0 means all memory is UC.)
  FE=0: disable fixed range MTRRs
  type: default memory type=writeback
  Accept write this value. Other value results in #GP.

- Fixed range MTRR
  #GP as fixed range MTRRs is reported as unsupported

- Variable range MTRRs
  #GP as the number of variable range MTRRs is reported as zero
 

> > +u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> > +{
> > +	/* TDX private GPA is always WB. */
> > +	if (!(gfn & kvm_gfn_shared_mask(vcpu->kvm))) {
> > +		/* MMIO is only for shared GPA. */
> > +		WARN_ON_ONCE(is_mmio);
> > +		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
> > +	}
> > +
> > +	/* Drop shared bit as MTRR doesn't know about shared bit. */
> > +	gfn = kvm_gfn_to_private(vcpu->kvm, gfn);
> > +
> > +	/* As TDX enforces CR0.CD to 0, pass check_cr0_cd = false. */
> > +	return __vmx_get_mt_mask(vcpu, gfn, is_mmio, false);
> > +}
> 
> 
> Do you know whether there's any use case of non-coherent device assignment to
> TDX guest?
> 
> IMHO, we should just disallow TDX guest to support non-coherent device
> assignment, so that we can just return WB for both private and shared.
> 
> If we support non-coherent device assignment, then if guest sets private memory
> to non-WB memory, it believes the memory type is non-WB, but in fact TDX always
> map private memory as WB.
> 
> Will this be a problem, i.e. if assigned device can DMA to private memory
> directly in the future?

MTRR is legacy feature and PAT replaced it.  We can rely on guest to use PAT.
Here is the new patch for MTRR.

--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -229,6 +229,14 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
 	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
 }
 
+static u8 vt_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	if (is_td_vcpu(vcpu))
+		return tdx_get_mt_mask(vcpu, gfn, is_mmio);
+
+	return vmx_get_mt_mask(vcpu, gfn, is_mmio);
+}
+
 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
 {
 	if (!is_td(kvm))
@@ -349,7 +357,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.set_identity_map_addr = vmx_set_identity_map_addr,
-	.get_mt_mask = vmx_get_mt_mask,
+	.get_mt_mask = vt_get_mt_mask,
 
 	.get_exit_info = vmx_get_exit_info,
 
diff -u b/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
--- b/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -347,6 +347,25 @@
 	return 0;
 }
 
+u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	/* TDX private GPA is always WB. */
+	if (!(gfn & kvm_gfn_shared_mask(vcpu->kvm))) {
+		/* MMIO is only for shared GPA. */
+		WARN_ON_ONCE(is_mmio);
+		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
+	}
+
+	if (is_mmio)
+		return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
+
+	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
+		return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
+
+	/* TDX enforces CR0.CD = 0 and KVM MTRR emulation enforces writeback. */
+	return MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
+}
+
 int tdx_vcpu_create(struct kvm_vcpu *vcpu)
 {
 	/*
@@ -1256,6 +1275,45 @@
 	return ret;
 }
 
+static int tdx_vcpu_init_mtrr(struct kvm_vcpu *vcpu)
+{
+	struct msr_data msr;
+	int ret;
+	int i;
+
+	/*
+	 * To avoid confusion with reporting VNCT = 0, explicitly disable
+	 * vaiale-range reisters.
+	 */
+	for (i = 0; i < KVM_NR_VAR_MTRR; i++) {
+		/* phymask */
+		msr = (struct msr_data) {
+			.host_initiated = true,
+			.index = 0x200 + 2 * i + 1,
+			.data = 0,	/* valid = 0 to disable. */
+		};
+		ret = kvm_set_msr_common(vcpu, &msr);
+		if (ret)
+			return -EINVAL;
+	}
+
+	/* Set MTRR to use writeback on reset. */
+	msr = (struct msr_data) {
+		.host_initiated = true,
+		.index = MSR_MTRRdefType,
+		/*
+		 * Set E(enable MTRR)=1, FE(enable fixed range MTRR)=0, default
+		 * type=writeback on reset to avoid UC.  Note E=0 means all
+		 * memory is UC.
+		 */
+		.data = (1 << 11) | MTRR_TYPE_WRBACK,
+	};
+	ret = kvm_set_msr_common(vcpu, &msr);
+	if (ret)
+		return -EINVAL;
+	return 0;
+}
+
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
 {
 	struct msr_data apic_base_msr;
@@ -1293,6 +1351,10 @@
 	if (kvm_set_apic_base(vcpu, &apic_base_msr))
 		return -EINVAL;
 
+	ret = tdx_vcpu_init_mtrr(vcpu);
+	if (ret)
+		return ret;
+
 	ret = tdx_td_vcpu_init(vcpu, (u64)cmd.data);
 	if (ret)
 		return ret;
@@ -1676,7 +1738,9 @@
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_ARCH_CAPABILITIES:
 	case MSR_IA32_POWER_CTL:
+	case MSR_MTRRcap:
 	case MSR_IA32_CR_PAT:
+	case MSR_MTRRdefType:
 	case MSR_IA32_TSC_DEADLINE:
 	case MSR_IA32_MISC_ENABLE:
 	case MSR_PLATFORM_INFO:
@@ -1718,16 +1782,47 @@
 
 int tdx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 {
-	if (tdx_has_emulated_msr(msr->index, false))
-		return kvm_get_msr_common(vcpu, msr);
-	return 1;
+	switch (msr->index) {
+	case MSR_MTRRcap:
+		/*
+		 * Override kvm_mtrr_get_msr() which hardcodes the value.
+		 * Report SMRR = 0, WC = 0, FIX = 0 VCNT = 0 to disable MTRR
+		 * effectively.
+		 */
+		msr->data = 0;
+		return 0;
+	default:
+		if (tdx_has_emulated_msr(msr->index, false))
+			return kvm_get_msr_common(vcpu, msr);
+		return 1;
+	}
 }
 
 int tdx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 {
-	if (tdx_has_emulated_msr(msr->index, true))
+	switch (msr->index) {
+	case MSR_MTRRdefType:
+		/*
+		 * Allow writeback only for all memory.
+		 * Because it's reported that fixed range MTRR isn't supported
+		 * and VCNT=0, enforce MTRRDefType.FE = 0 and don't care
+		 * variable range MTRRs. Only default memory type matters.
+		 *
+		 * bit 11 E: MTRR enable/disable
+		 * bit 12 FE: Fixed-range MTRRs enable/disable
+		 * (E, FE) = (1, 1): enable MTRR and Fixed range MTRR
+		 * (E, FE) = (1, 0): enable MTRR, disable Fixed range MTRR
+		 * (E, FE) = (0, *): disable all MTRRs.  all physical memory
+		 *                   is UC
+		 */
+		if (msr->data != ((1 << 11) | MTRR_TYPE_WRBACK))
+			return 1;
 		return kvm_set_msr_common(vcpu, msr);
-	return 1;
+	default:
+		if (tdx_has_emulated_msr(msr->index, true))
+			return kvm_set_msr_common(vcpu, msr);
+		return 1;
+	}
 }
 
 int tdx_dev_ioctl(void __user *argp)
unchanged:
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -152,6 +152,7 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
 int tdx_vcpu_create(struct kvm_vcpu *vcpu);
 void tdx_vcpu_free(struct kvm_vcpu *vcpu);
 void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
+u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
 
@@ -176,6 +177,7 @@ static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOP
 static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
 static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
 static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
+static inline u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; }
 
 static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
 



-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 057/113] KVM: TDX: MTRR: implement get_mt_mask() for TDX
Posted by Huang, Kai 2 years, 10 months ago
On Fri, 2023-03-24 at 18:12 -0700, Isaku Yamahata wrote:
> On Thu, Mar 16, 2023 at 10:38:02AM +0000,
> "Huang, Kai" <kai.huang@intel.com> wrote:
> 
> > On Sun, 2023-03-12 at 10:56 -0700, isaku.yamahata@intel.com wrote:
> > > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > > 
> > > Because TDX virtualize cpuid[0x1].EDX[MTRR: bit 12] to fixed 1, guest TD
> > > thinks MTRR is supported.  Although TDX supports only WB for private GPA,
> > > it's desirable to support MTRR for shared GPA.  As guest access to MTRR
> > > MSRs causes #VE and KVM/x86 tracks the values of MTRR MSRs, the remining
> > > part is to implement get_mt_mask method for TDX for shared GPA.
> > > 
> > > Pass around shared bit from kvm fault handler to get_mt_mask method so that
> > > it can determine if the gfn is shared or private.  
> > > 
> > 
> > I think we have an Xarray to query whether a given GFN is shared or private?
> > Can we use that?
> > 
> > > Implement get_mt_mask()
> > > following vmx case for shared GPA and return WB for private GPA.
> > > the existing vmx_get_mt_mask() can't be directly used as CPU state(CR0.CD)
> > > is protected.  GFN passed to kvm_mtrr_check_gfn_range_consistency() should
> > > include shared bit.
> > > 
> > > Suggested-by: Kai Huang <kai.huang@intel.com>
> > 
> > I am not sure what is suggested by me?
> > 
> > I thought what I suggested is we should have a dedicated patch to handle MTRR
> > for TDX putting all related things together.
> 
> Sure. After looking at the specs, my conclusion is that guest TD can't update
> MTRR reliably.  Because MTRR update requires to disable cache(CR0.CR=1), cache
> flush, and tlb flush. (SDM 3a 12.11.7: MTRR maintenance programming interface)
> Linux implements MTRR update logic according to it.
> 
> While TDX enforces CR0.CD=0, trying to set CR0.CD=1 results in #GP.  At the
> same time, it reports that MTRR is available via cpuid.  So I come up with the
> followings.
> 
> - MTRRCap(RO): report no feature available
>   SMRR=0: SMRR interface unsupported
>   WC=0: write combining unsupported
>   FIX=0: Fixed range registers unsupported
>   VCNT=0: number of variable range regitsers = 0
> 
> - MTRRDefType(R/W): Only writeback even with reset state.
>   E=1: enable MTRR (E=0 means all memory is UC.)
>   FE=0: disable fixed range MTRRs
>   type: default memory type=writeback
>   Accept write this value. Other value results in #GP.
> 
> - Fixed range MTRR
>   #GP as fixed range MTRRs is reported as unsupported
> 
> - Variable range MTRRs
>   #GP as the number of variable range MTRRs is reported as zero

Looks sane to me.

>  
> 
> > > +u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> > > +{
> > > +	/* TDX private GPA is always WB. */
> > > +	if (!(gfn & kvm_gfn_shared_mask(vcpu->kvm))) {
> > > +		/* MMIO is only for shared GPA. */
> > > +		WARN_ON_ONCE(is_mmio);
> > > +		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
> > > +	}
> > > +
> > > +	/* Drop shared bit as MTRR doesn't know about shared bit. */
> > > +	gfn = kvm_gfn_to_private(vcpu->kvm, gfn);
> > > +
> > > +	/* As TDX enforces CR0.CD to 0, pass check_cr0_cd = false. */
> > > +	return __vmx_get_mt_mask(vcpu, gfn, is_mmio, false);
> > > +}
> > 
> > 
> > Do you know whether there's any use case of non-coherent device assignment to
> > TDX guest?
> > 
> > IMHO, we should just disallow TDX guest to support non-coherent device
> > assignment, so that we can just return WB for both private and shared.
> > 
> > If we support non-coherent device assignment, then if guest sets private memory
> > to non-WB memory, it believes the memory type is non-WB, but in fact TDX always
> > map private memory as WB.
> > 
> > Will this be a problem, i.e. if assigned device can DMA to private memory
> > directly in the future?
> 
> MTRR is legacy feature and PAT replaced it.  We can rely on guest to use PAT.
> Here is the new patch for MTRR.
> 
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -229,6 +229,14 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
>  	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
>  }
>  
> +static u8 vt_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +{
> +	if (is_td_vcpu(vcpu))
> +		return tdx_get_mt_mask(vcpu, gfn, is_mmio);
> +
> +	return vmx_get_mt_mask(vcpu, gfn, is_mmio);
> +}
> +
>  static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>  {
>  	if (!is_td(kvm))
> @@ -349,7 +357,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>  
>  	.set_tss_addr = vmx_set_tss_addr,
>  	.set_identity_map_addr = vmx_set_identity_map_addr,
> -	.get_mt_mask = vmx_get_mt_mask,
> +	.get_mt_mask = vt_get_mt_mask,
>  
>  	.get_exit_info = vmx_get_exit_info,
>  
> diff -u b/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> --- b/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -347,6 +347,25 @@
>  	return 0;
>  }
>  
> +u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +{
> +	/* TDX private GPA is always WB. */
> +	if (!(gfn & kvm_gfn_shared_mask(vcpu->kvm))) {

Are you still passing a "raw" GFN?  Could you explain why you choose this way?

> +		/* MMIO is only for shared GPA. */
> +		WARN_ON_ONCE(is_mmio);
> +		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;

I guess it's better to include VMX_EPT_IPAT_BIT bit.

> +	}
> +
> +	if (is_mmio)
> +		return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
> +
> +	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
> +		return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
> +
> +	/* TDX enforces CR0.CD = 0 and KVM MTRR emulation enforces writeback. */
> +	return MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
> +}
> +
Re: [PATCH v13 057/113] KVM: TDX: MTRR: implement get_mt_mask() for TDX
Posted by Isaku Yamahata 2 years, 10 months ago
On Mon, Mar 27, 2023 at 09:54:40AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> > diff -u b/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > --- b/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -347,6 +347,25 @@
> >  	return 0;
> >  }
> >  
> > +u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> > +{
> > +	/* TDX private GPA is always WB. */
> > +	if (!(gfn & kvm_gfn_shared_mask(vcpu->kvm))) {
> 
> Are you still passing a "raw" GFN?  Could you explain why you choose this way?
> 
> > +		/* MMIO is only for shared GPA. */
> > +		WARN_ON_ONCE(is_mmio);
> > +		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
> 
> I guess it's better to include VMX_EPT_IPAT_BIT bit.

On second thought, there is no need to check it.  We can simply drop this check.

u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
	if (is_mmio)
		return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;

	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
		return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;

	/* TDX enforces CR0.CD = 0 and KVM MTRR emulation enforces writeback. */
	return MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
}

-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 057/113] KVM: TDX: MTRR: implement get_mt_mask() for TDX
Posted by Huang, Kai 2 years, 10 months ago
On Wed, 2023-03-29 at 18:15 -0700, Isaku Yamahata wrote:
> On Mon, Mar 27, 2023 at 09:54:40AM +0000,
> "Huang, Kai" <kai.huang@intel.com> wrote:
> 
> > > diff -u b/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > > --- b/arch/x86/kvm/vmx/tdx.c
> > > +++ b/arch/x86/kvm/vmx/tdx.c
> > > @@ -347,6 +347,25 @@
> > >  	return 0;
> > >  }
> > >  
> > > +u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> > > +{
> > > +	/* TDX private GPA is always WB. */
> > > +	if (!(gfn & kvm_gfn_shared_mask(vcpu->kvm))) {
> > 
> > Are you still passing a "raw" GFN?  Could you explain why you choose this way?
> > 
> > > +		/* MMIO is only for shared GPA. */
> > > +		WARN_ON_ONCE(is_mmio);
> > > +		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
> > 
> > I guess it's better to include VMX_EPT_IPAT_BIT bit.
> 
> On second thought, there is no need to check it.  We can simply drop this check.
> 
> u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> {
> 	if (is_mmio)
> 		return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
> 
> 	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
> 		return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
> 
> 	/* TDX enforces CR0.CD = 0 and KVM MTRR emulation enforces writeback. */
> 	return MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
> }
> 
> 

I think for private page, _theoretically_, you should include VMX_EPT_IPAT_BIT
(because the TDX module spec says the "access semantics for private is WB", but
not "private is _mapped_ as WB").  But in practice this doesn't matter because
the mirrored-EPT is never used by hardware.

While for shared page, when guest has non-coherent DMA, IIUC your intention is
you still want to horner guest's PAT, so you omitted the VMX_EPT_IPAT_BIT.