[v2] KVM: guest memory: Misc enhacnement

[RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private

Posted by isaku.yamahata@intel.com 2 years, 7 months ago

From: Isaku Yamahata <isaku.yamahata@intel.com>

Introduce kvm fault type to indicate how to handle kvm page fault.

It is unfortunate and inflexible for kvm_mmu_do_page_fault() to call
kvm_mem_is_private(), eventually looking up memory attributes.  Later
__kvm_faultin_pfn() looks up memory attributes again.  There is a race
condition that other threads can change memory attributes due to not
gaining the mmu lock.  SNP-SEV and TDX define theri way to indicate that
the page fault is private.

Add KVM fault type, add mmu_private_fault_mask to struct kvm_arch for SNP
to determine the fault is private, add gfn_shared_mask to struct kvm_arch
for TDX to determine the fault is private. KVM_FAULT_SHARED_ALWAYS is added
for the conventional guest to avoid over head to lookup memory attributes.

Suggested-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
Changes v1 -> v2:
- Introduced fault type and replaced is_private with fault_type.
- Add kvm_get_fault_type() to encapsulate the difference.
---
 arch/x86/include/asm/kvm_host.h |  6 ++++++
 arch/x86/kvm/mmu/mmu.c          | 26 ++++++++++++++++++++------
 arch/x86/kvm/mmu/mmu_internal.h | 33 +++++++++++++++++++++++++++++++--
 3 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8ae131dc645d..5afeefc7a516 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1445,6 +1445,12 @@ struct kvm_arch {
 	 */
 #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
 	struct kvm_mmu_memory_cache split_desc_cache;
+
+#ifdef CONFIG_KVM_PROTECTED_VM
+	/* To make the patch compile. */
+	u64 mmu_private_fault_mask;
+	gfn_t gfn_shared_mask;
+#endif
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b8ba7f11c3cb..feec75515f39 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3174,10 +3174,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
 
 static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
 				       const struct kvm_memory_slot *slot,
-				       gfn_t gfn, int max_level, bool is_private)
+				       gfn_t gfn, int max_level,
+				       enum kvm_fault_type fault_type)
 {
 	struct kvm_lpage_info *linfo;
 	int host_level;
+	bool is_private = fault_type == KVM_FAULT_PRIVATE;
 
 	max_level = min(max_level, max_huge_page_level);
 	for ( ; max_level > PG_LEVEL_4K; max_level--) {
@@ -3228,7 +3230,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	 */
 	fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
 						       fault->gfn, fault->max_level,
-						       fault->is_private);
+						       fault->fault_type);
 	if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
 		return;
 
@@ -4328,7 +4330,7 @@ static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu,
 				    struct kvm_page_fault *fault)
 {
 	vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
-	if (fault->is_private)
+	if (fault->fault_type == KVM_FAULT_PRIVATE)
 		vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
 	else
 		vcpu->run->memory.flags = 0;
@@ -4386,10 +4388,22 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 			return RET_PF_EMULATE;
 	}
 
-	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
-		return kvm_do_memory_fault_exit(vcpu, fault);
+	if (fault->fault_type == KVM_FAULT_SHARED_ALWAYS) {
+		/*
+		 * The conventional case. Don't lookup memory attributes to
+		 * avoid overhead
+		 */
+		fault->fault_type = KVM_FAULT_SHARED;
+	} else if (fault->fault_type == KVM_FAULT_MEM_ATTR) {
+		fault->fault_type = kvm_mem_is_private(vcpu->kvm, fault->gfn) ?
+			KVM_FAULT_PRIVATE : KVM_FAULT_SHARED;
+	} else {
+		if ((fault->fault_type == KVM_FAULT_PRIVATE) !=
+		    kvm_mem_is_private(vcpu->kvm, fault->gfn))
+			return kvm_do_memory_fault_exit(vcpu, fault);
+	}
 
-	if (fault->is_private)
+	if (fault->fault_type == KVM_FAULT_PRIVATE)
 		return kvm_faultin_pfn_private(vcpu, fault);
 
 	async = false;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 7f9ec1e5b136..0ec0b927a391 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -188,6 +188,13 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
 	return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
 }
 
+enum kvm_fault_type {
+	KVM_FAULT_MEM_ATTR,
+	KVM_FAULT_SHARED,
+	KVM_FAULT_SHARED_ALWAYS,
+	KVM_FAULT_PRIVATE,
+};
+
 struct kvm_page_fault {
 	/* arguments to kvm_mmu_do_page_fault.  */
 	const gpa_t addr;
@@ -203,9 +210,10 @@ struct kvm_page_fault {
 
 	/* Derived from mmu and global state.  */
 	const bool is_tdp;
-	const bool is_private;
 	const bool nx_huge_page_workaround_enabled;
 
+	enum kvm_fault_type fault_type;
+
 	/*
 	 * Whether a >4KB mapping can be created or is forbidden due to NX
 	 * hugepages.
@@ -282,6 +290,27 @@ enum {
 	RET_PF_SPURIOUS,
 };
 
+static inline enum kvm_fault_type kvm_get_fault_type(struct kvm *kvm,
+						     gpa_t gpa, u64 err)
+{
+
+#ifdef CONFIG_KVM_PROTECTED_VM
+	/* SEV-SNP handling */
+	if (kvm->arch.mmu_private_fault_mask)
+		return (err & kvm->arch.mmu_private_fault_mask) ?
+			KVM_FAULT_PRIVATE : KVM_FAULT_SHARED;
+
+	/* TDX handling */
+	if (kvm->arch.gfn_shared_mask)
+		return (gpa_to_gfn(gpa) & kvm->arch.gfn_shared_mask) ?
+			KVM_FAULT_SHARED : KVM_FAULT_PRIVATE;
+#endif
+	if (kvm->arch.vm_type == KVM_X86_PROTECTED_VM)
+		return KVM_FAULT_MEM_ATTR;
+	/* Don't query memory attributes. */
+	return KVM_FAULT_SHARED_ALWAYS;
+}
+
 static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 					u64 err, bool prefetch, int *emulation_type)
 {
@@ -301,7 +330,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 		.max_level = KVM_MAX_HUGEPAGE_LEVEL,
 		.req_level = PG_LEVEL_4K,
 		.goal_level = PG_LEVEL_4K,
-		.is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
+		.fault_type = kvm_get_fault_type(vcpu->kvm, cr2_or_gpa, err),
 	};
 	int r;
 
-- 
2.25.1

Re: [RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private

Posted by Sean Christopherson 2 years, 7 months ago

On Thu, Jun 22, 2023, isaku.yamahata@intel.com wrote:
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 7f9ec1e5b136..0ec0b927a391 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -188,6 +188,13 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
>  	return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
>  }
>  
> +enum kvm_fault_type {
> +	KVM_FAULT_MEM_ATTR,
> +	KVM_FAULT_SHARED,
> +	KVM_FAULT_SHARED_ALWAYS,
> +	KVM_FAULT_PRIVATE,

This is silly.  Just use AMD's error code bit, i.e. PFERR_GUEST_ENC_MASK as per
the SNP series.

  Bit 34 (ENC): Set to 1 if the guest’s effective C-bit was 1, 0 otherwise.

Just because Intel's ucode is too crusty to support error codes larger than 16
bits doesn't mean KVM can't utilize the bits :-)  KVM already translates to AMD's
error codes for other things, e.g.

	error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;

For TDX, handle_ept_violation() can do something like:

	if (is_tdx(vcpu->kvm))
		error_code |= (gpa & shared) ? 0 : PFERR_GUEST_ENC_MASK;
	else if (kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(gpa)))
		error_code |= PFERR_GUEST_ENC_MASK;

And that's not even taking into account that TDX might have a separate entry point,
i.e. the "is_tdx()" check can probably be avoided.

As for optimizing kvm_mem_is_private() to avoid unnecessary xarray lookups, that
can and should be done separately, e.g.

  static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
  {
	return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
	       kvm_guest_has_private_mem(kvm) &&
	       kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
  }

where x86's implementation of kvm_guest_has_private_mem() can be

  #define kvm_guest_has_private_mem(kvm) (!!(kvm)->vm_type)

Re: [RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private

Posted by Michael Roth 2 years, 7 months ago

On Fri, Jun 23, 2023 at 01:04:03PM -0700, Sean Christopherson wrote:
> On Thu, Jun 22, 2023, isaku.yamahata@intel.com wrote:
> > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > index 7f9ec1e5b136..0ec0b927a391 100644
> > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > @@ -188,6 +188,13 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> >  	return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
> >  }
> >  
> > +enum kvm_fault_type {
> > +	KVM_FAULT_MEM_ATTR,
> > +	KVM_FAULT_SHARED,
> > +	KVM_FAULT_SHARED_ALWAYS,
> > +	KVM_FAULT_PRIVATE,
> 
> This is silly.  Just use AMD's error code bit, i.e. PFERR_GUEST_ENC_MASK as per
> the SNP series.
> 
>   Bit 34 (ENC): Set to 1 if the guest’s effective C-bit was 1, 0 otherwise.
> 
> Just because Intel's ucode is too crusty to support error codes larger than 16
> bits doesn't mean KVM can't utilize the bits :-)  KVM already translates to AMD's
> error codes for other things, e.g.
> 
> 	error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
> 	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
> 
> For TDX, handle_ept_violation() can do something like:
> 
> 	if (is_tdx(vcpu->kvm))
> 		error_code |= (gpa & shared) ? 0 : PFERR_GUEST_ENC_MASK;
> 	else if (kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(gpa)))
> 		error_code |= PFERR_GUEST_ENC_MASK;

Maybe this is what you're getting at below, but seems awkward to have this
being handling in TDX code since that would suggest that SVM module would
also need to duplicate that logic and synthesize this PFERR_GUEST_ENC_MASK
bit for non-SNP VMs (e.g. gmem self-tests).

So maybe SNP/TDX can rely on passing this via error_code, and then some
common code, like kvm_mem_is_private(), can handle this for non-TDX/SNP
guest types. But the current gmem series does this via a new .is_private
in kvm_page_fault:

  .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),

This seems at odds with the idea of storing this 'fault->is_private'
logic into the error_code. Isaku and I were discussing[1] that we
should do one or the other:

  a) store everything in error_code
  b) use the .is_private field that seems to have been introduced to
     track exactly this information

So I think this series is attempting to do b). If you're suggesting we
should do a), then what purpose is fault->is_private field meant to
serve? Are you planning to get rid of it? Otherwise it seems redundant.

But I think approach b) is useful for another reason:

> 
> And that's not even taking into account that TDX might have a separate entry point,
> i.e. the "is_tdx()" check can probably be avoided.
> 
> As for optimizing kvm_mem_is_private() to avoid unnecessary xarray lookups, that
> can and should be done separately, e.g.
> 
>   static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>   {
> 	return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
> 	       kvm_guest_has_private_mem(kvm) &&
> 	       kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
>   }
> 
> where x86's implementation of kvm_guest_has_private_mem() can be
> 
>   #define kvm_guest_has_private_mem(kvm) (!!(kvm)->vm_type)

It's just about not checking xarray for non-protected case. The
optimization here is that neither TDX nor SNP care about the xarray as
far as determining whether the *fault* was a private or not. We only
care later, in part of the KVM MMU code that determines whether the
fault type is at odds with the xarray state and whether to generate a
KVM_EXIT_MEMORY_FAULT as a result.

In that code, both TDX/SNP, as well as gmem self-tests, will all end up
calling kvm_mem_is_private().

In the gmem self-test case, in current gmem base series, and I think with
what you've proposed here, we'd check the xarray via kvm_mem_is_privae(),
both in kvm_mmu_do_page_fault(), as well as later kvm_faultin_pfn() where
KVM_EXIT_MEMORY_FAULT case is handled. That seems off because:

  1) Checking in kvm_mem_is_private() via kvm_mmu_do_page_fault() means
     that we will grab the value prior to when the KVM MMU records the
     mmu_invalidate_seq, which means there's a window between
     kvm_mmu_do_page_fault() and kvm_faultin_pfn() where an updated
     xarray won't be noticed, and the #NPF retry logic will not get
     triggered.

  2) For gmem self-test, kvm_mem_is_private() is the authority on
     whether the fault is private or not. There's no need to distinguish
     between what was set via KVM_SET_MEMORY_ATTRIBUTES, vs. what was
     indicated via fault flags/GPA like TDX/SNP do.

So it makes sense, in the gmem case (and TDX/SNP), to defer the
kvm_mem_is_private() till later in kvm_faultin_pfn(). It avoid a
duplicate lookup, and a race. But .is_private only conveys
encrypted/unencrypted fault in TDX/SNP case, it doesn't have a way to
cleanly convey this case "just use whatever kvm_mem_is_private() reports
later, because it will always be what the VMM set, and it's too early
to check kvm_mem_is_private() right now".

So that's where this enum type came from. Although in the discussion I
linked to above I suggested just:

  enum kvm_fault_type {
    KVM_FAULT_VMM_DEFINED,
    KVM_FAULT_SHARED,
    KVM_FAULT_PRIVATE,

Which I think would work just as well, since VMM_DEFINED is basically what
MEM_ATTR case handles in this patch, and if you also add the
kvm_guest_has_private_mem() check into kvm_mem_is_private() as you suggested,
it would naturally cover the SHARED_ALWAYS case as well.

I'm fine with other approaches, but think there are a couple good reasons
(#1 and #2 above) for what Isaku is proposing, and any alternative
approach should account for them as well.

[1] https://lore.kernel.org/kvm/20230612042559.375660-1-michael.roth@amd.com/T/#mdb24fe4998aa3ff3e568e8a0cba4d12b159087c7

-Mike

Re: [RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private

Posted by Sean Christopherson 2 years, 7 months ago

On Sun, Jun 25, 2023, Michael Roth wrote:
> On Fri, Jun 23, 2023 at 01:04:03PM -0700, Sean Christopherson wrote:
> > On Thu, Jun 22, 2023, isaku.yamahata@intel.com wrote:
> > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > > index 7f9ec1e5b136..0ec0b927a391 100644
> > > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > > @@ -188,6 +188,13 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> > >  	return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
> > >  }
> > >  
> > > +enum kvm_fault_type {
> > > +	KVM_FAULT_MEM_ATTR,
> > > +	KVM_FAULT_SHARED,
> > > +	KVM_FAULT_SHARED_ALWAYS,
> > > +	KVM_FAULT_PRIVATE,
> > 
> > This is silly.  Just use AMD's error code bit, i.e. PFERR_GUEST_ENC_MASK as per
> > the SNP series.
> > 
> >   Bit 34 (ENC): Set to 1 if the guest’s effective C-bit was 1, 0 otherwise.
> > 
> > Just because Intel's ucode is too crusty to support error codes larger than 16
> > bits doesn't mean KVM can't utilize the bits :-)  KVM already translates to AMD's
> > error codes for other things, e.g.
> > 
> > 	error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
> > 	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
> > 
> > For TDX, handle_ept_violation() can do something like:
> > 
> > 	if (is_tdx(vcpu->kvm))
> > 		error_code |= (gpa & shared) ? 0 : PFERR_GUEST_ENC_MASK;
> > 	else if (kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(gpa)))
> > 		error_code |= PFERR_GUEST_ENC_MASK;
> 
> Maybe this is what you're getting at below, but seems awkward to have this
> being handling in TDX code since that would suggest that SVM module would
> also need to duplicate that logic and synthesize this PFERR_GUEST_ENC_MASK
> bit for non-SNP VMs (e.g. gmem self-tests).

Ah, right, forgot about that angle.  The non-TDX synthesizing can be done in
kvm_mmu_page_fault(), e.g.

	if (vcpu->kvm->vm_type == KVM_X86_PROTECTED_VM &&
	    kvm_mem_is_private(...))
		error_code |= PFERR_GUEST_ENC_MASK;

> So maybe SNP/TDX can rely on passing this via error_code, and then some
> common code, like kvm_mem_is_private(), can handle this for non-TDX/SNP
> guest types. But the current gmem series does this via a new .is_private
> in kvm_page_fault:
> 
>   .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
> 
> This seems at odds with the idea of storing this 'fault->is_private'
> logic into the error_code. Isaku and I were discussing[1] that we
> should do one or the other:
> 
>   a) store everything in error_code
>   b) use the .is_private field that seems to have been introduced to
>      track exactly this information
> 
> So I think this series is attempting to do b). If you're suggesting we
> should do a), then what purpose is fault->is_private field meant to
> serve? Are you planning to get rid of it? Otherwise it seems redundant.
> 
> But I think approach b) is useful for another reason:

"is_private" would serve the same purpose as all the other bits that are derived
from the error code, e.g. improve readability, reduce line lengths, etc.  Though
looking at the name, just "private" is probably the right name.

	/* Derived from error_code.  */
	const bool exec;
	const bool write;
	const bool present;
	const bool rsvd;
	const bool user;

> > And that's not even taking into account that TDX might have a separate entry point,
> > i.e. the "is_tdx()" check can probably be avoided.
> > 
> > As for optimizing kvm_mem_is_private() to avoid unnecessary xarray lookups, that
> > can and should be done separately, e.g.
> > 
> >   static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> >   {
> > 	return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
> > 	       kvm_guest_has_private_mem(kvm) &&
> > 	       kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> >   }
> > 
> > where x86's implementation of kvm_guest_has_private_mem() can be
> > 
> >   #define kvm_guest_has_private_mem(kvm) (!!(kvm)->vm_type)
> 
> It's just about not checking xarray for non-protected case. The
> optimization here is that neither TDX nor SNP care about the xarray as
> far as determining whether the *fault* was a private or not.

Yes, and what I am suggesting doesn't use kvm_mem_is_private() to synthesize that
flag for TDX (or SNP).

> We only care later, in part of the KVM MMU code that determines whether the
> fault type is at odds with the xarray state and whether to generate a
> KVM_EXIT_MEMORY_FAULT as a result.
> 
> In that code, both TDX/SNP, as well as gmem self-tests, will all end up
> calling kvm_mem_is_private().
> 
> In the gmem self-test case, in current gmem base series, and I think with
> what you've proposed here, we'd check the xarray via kvm_mem_is_privae(),
> both in kvm_mmu_do_page_fault(), as well as later kvm_faultin_pfn() where
> KVM_EXIT_MEMORY_FAULT case is handled. That seems off because:
> 
>   1) Checking in kvm_mem_is_private() via kvm_mmu_do_page_fault() means
>      that we will grab the value prior to when the KVM MMU records the
>      mmu_invalidate_seq, which means there's a window between
>      kvm_mmu_do_page_fault() and kvm_faultin_pfn() where an updated
>      xarray won't be noticed, and the #NPF retry logic will not get
>      triggered.

That's ok-ish, for some definitions of ok.  There's no fatal bug, but userspace
will see a spurious, arguably nonsensical exit.  If the race occurs before mmu_seq
is snapshot, this code will detect the change and exit to userspace.

	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
		return kvm_do_memory_fault_exit(vcpu, fault);

>   2) For gmem self-test, kvm_mem_is_private() is the authority on
>      whether the fault is private or not. There's no need to distinguish
>      between what was set via KVM_SET_MEMORY_ATTRIBUTES, vs. what was
>      indicated via fault flags/GPA like TDX/SNP do.
> 
> So it makes sense, in the gmem case (and TDX/SNP), to defer the
> kvm_mem_is_private() till later in kvm_faultin_pfn(). It avoid a
> duplicate lookup, and a race. But .is_private only conveys
> encrypted/unencrypted fault in TDX/SNP case, it doesn't have a way to
> cleanly convey this case "just use whatever kvm_mem_is_private() reports
> later, because it will always be what the VMM set, and it's too early
> to check kvm_mem_is_private() right now".

Yeah, the duplicate lookup is unfortunate :-/  But I'd really like to be able to
make kvm_page_fault.private const, just like all the other booleans that are
derived from the error code.  My concern with making it *not* const is that
there will be a discrepancy between "private" and "error_code", and we'll have
to be very careful to never touch "private" before kvm_faultin_pfn().

And I don't want to special case "VMM defined", because the primary reason the
"VMM defined" case exists at this time is to allow testing KVM's implementation
without TDX or SNP.  E.g. I don't want to end up with code in fast_page_fault()
or so that does X for KVM_FAULT_VMM_DEFINED, but Y for KVM_FAULT_PRIVATE.

So I'm leaning toward the above be

	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
		if (vcpu->kvm->vm_type == KVM_X86_PROTECTED_VM)
			return RET_PF_RETRY;
		else
			return kvm_do_memory_fault_exit(vcpu, fault);
	}

even though software-protected VMs would perform a technically-unnecessary
attributes lookup.  *If* software-protected VMs ever get to the point where folks
care about the performance overhead of the extra lookup, then I'm all for
revisiting the implementation, but that is a ginormous "if" at this point.  Though
even then I'd still prefer to figure out a way to make the flag const, but that's
a future problem.

> So that's where this enum type came from. Although in the discussion I
> linked to above I suggested just:
> 
>   enum kvm_fault_type {
>     KVM_FAULT_VMM_DEFINED,
>     KVM_FAULT_SHARED,
>     KVM_FAULT_PRIVATE,
> 
> Which I think would work just as well,

I want to use vm_type for tracking "VMM_DEFINED".  KVM's ABI easily allows for 64
VM types, I see no reason to reuse KVM_X86_PROTECTED_VM for TDX and/or SNP, though
the type probably should be KVM_X86_SW_PROTECTED_VM.  With that out of the way,
there's no need for an enum to track shared vs. private.

Re: [RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private

Posted by Michael Roth 2 years, 7 months ago

On Mon, Jun 26, 2023 at 11:21:45AM -0700, Sean Christopherson wrote:
> On Sun, Jun 25, 2023, Michael Roth wrote:
> > On Fri, Jun 23, 2023 at 01:04:03PM -0700, Sean Christopherson wrote:
> > > On Thu, Jun 22, 2023, isaku.yamahata@intel.com wrote:
> > > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > > > index 7f9ec1e5b136..0ec0b927a391 100644
> > > > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > > > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > > > @@ -188,6 +188,13 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> > > >  	return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
> > > >  }
> > > >  
> > > > +enum kvm_fault_type {
> > > > +	KVM_FAULT_MEM_ATTR,
> > > > +	KVM_FAULT_SHARED,
> > > > +	KVM_FAULT_SHARED_ALWAYS,
> > > > +	KVM_FAULT_PRIVATE,
> > > 
> > > This is silly.  Just use AMD's error code bit, i.e. PFERR_GUEST_ENC_MASK as per
> > > the SNP series.
> > > 
> > >   Bit 34 (ENC): Set to 1 if the guest’s effective C-bit was 1, 0 otherwise.
> > > 
> > > Just because Intel's ucode is too crusty to support error codes larger than 16
> > > bits doesn't mean KVM can't utilize the bits :-)  KVM already translates to AMD's
> > > error codes for other things, e.g.
> > > 
> > > 	error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
> > > 	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
> > > 
> > > For TDX, handle_ept_violation() can do something like:
> > > 
> > > 	if (is_tdx(vcpu->kvm))
> > > 		error_code |= (gpa & shared) ? 0 : PFERR_GUEST_ENC_MASK;
> > > 	else if (kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(gpa)))
> > > 		error_code |= PFERR_GUEST_ENC_MASK;
> > 
> > Maybe this is what you're getting at below, but seems awkward to have this
> > being handling in TDX code since that would suggest that SVM module would
> > also need to duplicate that logic and synthesize this PFERR_GUEST_ENC_MASK
> > bit for non-SNP VMs (e.g. gmem self-tests).
> 
> Ah, right, forgot about that angle.  The non-TDX synthesizing can be done in
> kvm_mmu_page_fault(), e.g.
> 
> 	if (vcpu->kvm->vm_type == KVM_X86_PROTECTED_VM &&
> 	    kvm_mem_is_private(...))
> 		error_code |= PFERR_GUEST_ENC_MASK;
> 
> > So maybe SNP/TDX can rely on passing this via error_code, and then some
> > common code, like kvm_mem_is_private(), can handle this for non-TDX/SNP
> > guest types. But the current gmem series does this via a new .is_private
> > in kvm_page_fault:
> > 
> >   .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
> > 
> > This seems at odds with the idea of storing this 'fault->is_private'
> > logic into the error_code. Isaku and I were discussing[1] that we
> > should do one or the other:
> > 
> >   a) store everything in error_code
> >   b) use the .is_private field that seems to have been introduced to
> >      track exactly this information
> > 
> > So I think this series is attempting to do b). If you're suggesting we
> > should do a), then what purpose is fault->is_private field meant to
> > serve? Are you planning to get rid of it? Otherwise it seems redundant.
> > 
> > But I think approach b) is useful for another reason:
> 
> "is_private" would serve the same purpose as all the other bits that are derived
> from the error code, e.g. improve readability, reduce line lengths, etc.  Though
> looking at the name, just "private" is probably the right name.
> 
> 	/* Derived from error_code.  */
> 	const bool exec;
> 	const bool write;
> 	const bool present;
> 	const bool rsvd;
> 	const bool user;

If we go out of our way to pull bits out of error_code so they can be
accessed more consistently/sensibly, why introduce additional bits into
error_code in the first place? It just seems like an unecessary
intermediate step, and introduces error_code bits that TDX/selftests
don't actually ever need, which raises the specter of "what if hardware
starts using this synthetic bit for something else?"

Is it mainly to avoid introducing additional parameters to
kvm_mmu_page_fault() and instead piggy-backing off of error_code param?
Or does recording the values into error_code have a use outside of that?

> 
> > > And that's not even taking into account that TDX might have a separate entry point,
> > > i.e. the "is_tdx()" check can probably be avoided.
> > > 
> > > As for optimizing kvm_mem_is_private() to avoid unnecessary xarray lookups, that
> > > can and should be done separately, e.g.
> > > 
> > >   static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> > >   {
> > > 	return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
> > > 	       kvm_guest_has_private_mem(kvm) &&
> > > 	       kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> > >   }
> > > 
> > > where x86's implementation of kvm_guest_has_private_mem() can be
> > > 
> > >   #define kvm_guest_has_private_mem(kvm) (!!(kvm)->vm_type)
> > 
> > It's just about not checking xarray for non-protected case. The
> > optimization here is that neither TDX nor SNP care about the xarray as
> > far as determining whether the *fault* was a private or not.
> 
> Yes, and what I am suggesting doesn't use kvm_mem_is_private() to synthesize that
> flag for TDX (or SNP).
> 
> > We only care later, in part of the KVM MMU code that determines whether the
> > fault type is at odds with the xarray state and whether to generate a
> > KVM_EXIT_MEMORY_FAULT as a result.
> > 
> > In that code, both TDX/SNP, as well as gmem self-tests, will all end up
> > calling kvm_mem_is_private().
> > 
> > In the gmem self-test case, in current gmem base series, and I think with
> > what you've proposed here, we'd check the xarray via kvm_mem_is_privae(),
> > both in kvm_mmu_do_page_fault(), as well as later kvm_faultin_pfn() where
> > KVM_EXIT_MEMORY_FAULT case is handled. That seems off because:
> > 
> >   1) Checking in kvm_mem_is_private() via kvm_mmu_do_page_fault() means
> >      that we will grab the value prior to when the KVM MMU records the
> >      mmu_invalidate_seq, which means there's a window between
> >      kvm_mmu_do_page_fault() and kvm_faultin_pfn() where an updated
> >      xarray won't be noticed, and the #NPF retry logic will not get
> >      triggered.
> 
> That's ok-ish, for some definitions of ok.  There's no fatal bug, but userspace
> will see a spurious, arguably nonsensical exit.  If the race occurs before mmu_seq
> is snapshot, this code will detect the change and exit to userspace.
> 
> 	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
> 		return kvm_do_memory_fault_exit(vcpu, fault);

Yah, that's not so bad, but it does make it more tricky to write tighter
tests since you always have to allow/handle spurious KVM_EXIT_MEMORY
faults instead of being able to individually flag those non-sensical
cases. Not a huge deal I guess.

> 
> >   2) For gmem self-test, kvm_mem_is_private() is the authority on
> >      whether the fault is private or not. There's no need to distinguish
> >      between what was set via KVM_SET_MEMORY_ATTRIBUTES, vs. what was
> >      indicated via fault flags/GPA like TDX/SNP do.
> > 
> > So it makes sense, in the gmem case (and TDX/SNP), to defer the
> > kvm_mem_is_private() till later in kvm_faultin_pfn(). It avoid a
> > duplicate lookup, and a race. But .is_private only conveys
> > encrypted/unencrypted fault in TDX/SNP case, it doesn't have a way to
> > cleanly convey this case "just use whatever kvm_mem_is_private() reports
> > later, because it will always be what the VMM set, and it's too early
> > to check kvm_mem_is_private() right now".
> 
> Yeah, the duplicate lookup is unfortunate :-/  But I'd really like to be able to
> make kvm_page_fault.private const, just like all the other booleans that are
> derived from the error code.  My concern with making it *not* const is that
> there will be a discrepancy between "private" and "error_code", and we'll have
> to be very careful to never touch "private" before kvm_faultin_pfn().

It can actually be const using the KVM_FAULT_{VMM_DEFINED,SHARED,PRIVATE} 
abstraction. fault->is_private doesn't really need to be overwritten for
VMM_DEFINED case later, it can just be treated as "use whatever
kvm_mem_is_private() reports". But I guess that's what you mean by
"special casing" below.

> 
> And I don't want to special case "VMM defined", because the primary reason the
> "VMM defined" case exists at this time is to allow testing KVM's implementation
> without TDX or SNP.  E.g. I don't want to end up with code in fast_page_fault()

Are you suggesting VMM_DEFINED would eventually go away once SNP/TDX
have bigger uptake, or maybe in favor of tests built around some new VM type
(e.g. software-protected VMs) that use guest hypercalls to set
guest-expected memory state rather than always relying on what the VMM sets
up?

I guess in that case VMM_DEFINED handling could just be dropped at
that point, and KVM_FAULT_{SHARED,PRIVATE} would still be relevant (or they
could get switched back to bool at that point), and software-protected VMs
would set that value based on whatever state tracks the hypercalls.

But if that's the end-game maybe it's a good reason for keeping
fault->is_private bool and avoiding enums. But still don't really see the
worth in also setting the bit in error_code.

> without TDX or SNP.  E.g. I don't want to end up with code in fast_page_fault()
> or so that does X for KVM_FAULT_VMM_DEFINED, but Y for KVM_FAULT_PRIVATE.

Hadn't really considered fast_page_fault() for SNP... it seems like
for explicit page-state changes, the vCPU issuing the conversions
would just block until the GHCB request it issued was completed. So by
the time it accesses the GPA, KVM_SET_MEMORY_ATTRIBUTES would have
already zapped the old entry, so the fast path would get bypassed at
that point.

For implicit page-state changes, I guess there's a risk you can
get stuck looping on fast_page_fault() since it's up to KVM MMU
to generate the KVM_EXIT_MEMORY_FAULT so KVM_SET_MEMORY_ATTRIBUTES
gets called eventually. Sort of surprised I haven't encountered
that case though... but anyway...

If you rely on similar checks to what slow path does:

 	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
 		return kvm_do_memory_fault_exit(vcpu, fault);

kvm_mem_is_private() could be stale due to unnoticed invalidation,
but eventually it would reach steady-state and the
KVM_EXIT_MEMORY_FAULT would fire. Is that sort of what you have in
mind there?

For SNP it seems more efficient to check for RMP bit and then head
straight to the slow-path, but this isn't a hot path so probably
doesn't matter much.

> 
> So I'm leaning toward the above be
> 
> 	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> 		if (vcpu->kvm->vm_type == KVM_X86_PROTECTED_VM)
> 			return RET_PF_RETRY;
> 		else
> 			return kvm_do_memory_fault_exit(vcpu, fault);
> 	}
> 
> even though software-protected VMs would perform a technically-unnecessary
> attributes lookup.  *If* software-protected VMs ever get to the point where folks
> care about the performance overhead of the extra lookup, then I'm all for
> revisiting the implementation, but that is a ginormous "if" at this point.  Though
> even then I'd still prefer to figure out a way to make the flag const, but that's
> a future problem.
> 
> > So that's where this enum type came from. Although in the discussion I
> > linked to above I suggested just:
> > 
> >   enum kvm_fault_type {
> >     KVM_FAULT_VMM_DEFINED,
> >     KVM_FAULT_SHARED,
> >     KVM_FAULT_PRIVATE,
> > 
> > Which I think would work just as well,
> 
> I want to use vm_type for tracking "VMM_DEFINED".  KVM's ABI easily allows for 64
> VM types, I see no reason to reuse KVM_X86_PROTECTED_VM for TDX and/or SNP, though
> the type probably should be KVM_X86_SW_PROTECTED_VM.  With that out of the way,
> there's no need for an enum to track shared vs. private.
> 

Introducing TDX/SNP vm types seems to buy us some flexibility so it
sounds useful.

Rather than synthesizing bits in error_code, maybe we could also use it
to help out there as well? (assuming parameter-passing was the main
use-case there)

  static bool kvm_fault_is_private(kvm, gpa, error_code)
  {
    if (kvm->vm_type == KVM_X86_TDX_VM)
      return gpa & TDX_SHARED_GPA_MASK;

    if (kvm->vm_type == KVM_X86_SNP_VM)
      return error_code & PFERR_GUEST_ENC_MASK;

    return kvm_mem_is_private(kvm, gpa);
  }

  static inline int kvm_mmu_do_page_fault(vcpu, gpa, err, ...)
  {
    struct kvm_page_fault = {
      ...
      .is_private = kvm_fault_is_private(kvm, gpa, err)
    };

    ...
  }

-Mike

Re: [RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private

Posted by Sean Christopherson 2 years, 7 months ago

On Tue, Jun 27, 2023, Michael Roth wrote:
> On Mon, Jun 26, 2023 at 11:21:45AM -0700, Sean Christopherson wrote:
> > "is_private" would serve the same purpose as all the other bits that are derived
> > from the error code, e.g. improve readability, reduce line lengths, etc.  Though
> > looking at the name, just "private" is probably the right name.
> > 
> > 	/* Derived from error_code.  */
> > 	const bool exec;
> > 	const bool write;
> > 	const bool present;
> > 	const bool rsvd;
> > 	const bool user;
> 
> If we go out of our way to pull bits out of error_code so they can be
> accessed more consistently/sensibly, why introduce additional bits into
> error_code in the first place? It just seems like an unecessary
> intermediate step, and introduces error_code bits that TDX/selftests
> don't actually ever need, which raises the specter of "what if hardware
> starts using this synthetic bit for something else?"

It's not a KVM-defined bit though, it's an AMD-defined bit.  That's a very big
difference as AMD and Intel both go to out of their way to not step on each other's
toes.

And *architecturally*, VMX doesn't support #PF error codes larger than 16 bits,
not to mention that all of this needs to be dependent on TDP.  And for EPT
violations, the error code is *fully* synthetic anyways because the error code
passed into kvm_mmu_page_fault is a translation of vmcs.EXIT_QUALIFICATION into
the "legacy" error code format.

So there's really no danger here, whereas a truly synthetic, KVM-defined bit like
PFERR_IMPLICIT_ACCESS *does* carry some risk because AMD in particular might use
bit 48 for a new hardware-defined flags.

> Is it mainly to avoid introducing additional parameters to
> kvm_mmu_page_fault() and instead piggy-backing off of error_code param?
> Or does recording the values into error_code have a use outside of that?

It provides canonical behavior within common KVM code.  Stashing individual flags
in const bools in kvm_page_fault is purely for ease-of-use, they are NOT the canonical
representation of state for the entirety of page fault handling.  E.g. in the unlikely
scenario that kvm_mmu_page_fault() needs to care about shared vs. private, there's
no kvm_page_fault structure to query.

> > > So it makes sense, in the gmem case (and TDX/SNP), to defer the
> > > kvm_mem_is_private() till later in kvm_faultin_pfn(). It avoid a
> > > duplicate lookup, and a race. But .is_private only conveys
> > > encrypted/unencrypted fault in TDX/SNP case, it doesn't have a way to
> > > cleanly convey this case "just use whatever kvm_mem_is_private() reports
> > > later, because it will always be what the VMM set, and it's too early
> > > to check kvm_mem_is_private() right now".
> > 
> > Yeah, the duplicate lookup is unfortunate :-/  But I'd really like to be able to
> > make kvm_page_fault.private const, just like all the other booleans that are
> > derived from the error code.  My concern with making it *not* const is that
> > there will be a discrepancy between "private" and "error_code", and we'll have
> > to be very careful to never touch "private" before kvm_faultin_pfn().
> 
> It can actually be const using the KVM_FAULT_{VMM_DEFINED,SHARED,PRIVATE} 
> abstraction. fault->is_private doesn't really need to be overwritten for
> VMM_DEFINED case later, it can just be treated as "use whatever
> kvm_mem_is_private() reports". But I guess that's what you mean by
> "special casing" below.

That's not constifying the state (private vs. shared), that's constifying behavior.
VMM_DEFINED is like Schrodinger's cat; the fault is neither private nor shared
until KVM actually looks at it.

> > And I don't want to special case "VMM defined", because the primary reason the
> > "VMM defined" case exists at this time is to allow testing KVM's implementation
> > without TDX or SNP.  E.g. I don't want to end up with code in fast_page_fault()
> 
> Are you suggesting VMM_DEFINED would eventually go away once SNP/TDX
> have bigger uptake,

I'm saying I don't want VMM_DEFINED, period.  :-)

> or maybe in favor of tests built around some new VM type (e.g.
> software-protected VMs) that use guest hypercalls to set guest-expected
> memory state rather than always relying on what the VMM sets up?
> 
> I guess in that case VMM_DEFINED handling could just be dropped at
> that point, and KVM_FAULT_{SHARED,PRIVATE} would still be relevant (or they
> could get switched back to bool at that point), and software-protected VMs
> would set that value based on whatever state tracks the hypercalls.
> 
> But if that's the end-game maybe it's a good reason for keeping
> fault->is_private bool and avoiding enums. But still don't really see the
> worth in also setting the bit in error_code.
> 
> > without TDX or SNP.  E.g. I don't want to end up with code in fast_page_fault()
> > or so that does X for KVM_FAULT_VMM_DEFINED, but Y for KVM_FAULT_PRIVATE.
> 
> Hadn't really considered fast_page_fault() for SNP... it seems like
> for explicit page-state changes, the vCPU issuing the conversions
> would just block until the GHCB request it issued was completed. So by
> the time it accesses the GPA, KVM_SET_MEMORY_ATTRIBUTES would have
> already zapped the old entry, so the fast path would get bypassed at
> that point.
> 
> For implicit page-state changes, I guess there's a risk you can
> get stuck looping on fast_page_fault() since it's up to KVM MMU
> to generate the KVM_EXIT_MEMORY_FAULT so KVM_SET_MEMORY_ATTRIBUTES
> gets called eventually. Sort of surprised I haven't encountered
> that case though... but anyway...
> 
> If you rely on similar checks to what slow path does:
> 
>  	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
>  		return kvm_do_memory_fault_exit(vcpu, fault);
> 
> kvm_mem_is_private() could be stale due to unnoticed invalidation,
> but eventually it would reach steady-state and the
> KVM_EXIT_MEMORY_FAULT would fire. Is that sort of what you have in
> mind there?
> 
> For SNP it seems more efficient to check for RMP bit and then head
> straight to the slow-path, but this isn't a hot path so probably
> doesn't matter much.

I'm not concerned about any specific path, i.e. don't read too much into my
arbitrary fast_page_fault() example.

What I am trying to point out is that I don't want to end up in a world where
a page fault is known to be private or shared for TDX/SNP, but not for SW-protected
VMs.  Because if that happens, then we'll end up in one of three situations:

  1. KVM can't act on private versus shared until after __kvm_faultin_pfn()
  2. KVM is buggy because it is consuming undefined state, i.e. querying if memory
     is private versus shared before looking inside the box
  3. KVM has different flows for TDX/SNP versus SW-protected VMs

> > So I'm leaning toward the above be
> > 
> > 	if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> > 		if (vcpu->kvm->vm_type == KVM_X86_PROTECTED_VM)
> > 			return RET_PF_RETRY;
> > 		else
> > 			return kvm_do_memory_fault_exit(vcpu, fault);
> > 	}
> > 
> > even though software-protected VMs would perform a technically-unnecessary
> > attributes lookup.  *If* software-protected VMs ever get to the point where folks
> > care about the performance overhead of the extra lookup, then I'm all for
> > revisiting the implementation, but that is a ginormous "if" at this point.  Though
> > even then I'd still prefer to figure out a way to make the flag const, but that's
> > a future problem.
> > 
> > > So that's where this enum type came from. Although in the discussion I
> > > linked to above I suggested just:
> > > 
> > >   enum kvm_fault_type {
> > >     KVM_FAULT_VMM_DEFINED,
> > >     KVM_FAULT_SHARED,
> > >     KVM_FAULT_PRIVATE,
> > > 
> > > Which I think would work just as well,
> > 
> > I want to use vm_type for tracking "VMM_DEFINED".  KVM's ABI easily allows for 64
> > VM types, I see no reason to reuse KVM_X86_PROTECTED_VM for TDX and/or SNP, though
> > the type probably should be KVM_X86_SW_PROTECTED_VM.  With that out of the way,
> > there's no need for an enum to track shared vs. private.
> > 
> 
> Introducing TDX/SNP vm types seems to buy us some flexibility so it
> sounds useful.
> 
> Rather than synthesizing bits in error_code, maybe we could also use it
> to help out there as well? (assuming parameter-passing was the main
> use-case there)
> 
>   static bool kvm_fault_is_private(kvm, gpa, error_code)
>   {
>     if (kvm->vm_type == KVM_X86_TDX_VM)
>       return gpa & TDX_SHARED_GPA_MASK;
> 
>     if (kvm->vm_type == KVM_X86_SNP_VM)
>       return error_code & PFERR_GUEST_ENC_MASK;
> 
>     return kvm_mem_is_private(kvm, gpa);
>   }

That's also an option, but (a) it's a bit kludgy as it requries three pieces of data,
(b) it's suboptimal for KVM_X86_SW_PROTECTED_VM if there are multiple lookups, and
(c) it will result in inconsistent behavior if there are multiple lookups.

E.g. these two things wouldn't be functionally equivalent, which is just asking
for bugs.

	if (!kvm_fault_is_private(...)
		A();

	B();

	if (kvm_fault_is_private(...)
		C();

versus

	bool private = kvm_fault_is_private();

	if (!private)
		A();

	B();

	if (private)
		C();

[RFC PATCH v2 1/6] KVM: selftests: Fix test_add_overlapping_private_memory_regions()
[RFC PATCH v2 2/6] KVM: selftests: Fix guest_memfd()
[RFC PATCH v2 3/6] KVM: x86/mmu: Pass round full 64-bit error code for the KVM page fault
[RFC PATCH v2 4/6] KVM: x86: Introduce fault type to indicate kvm page fault is private
[RFC PATCH v2 5/6] KVM: Add flags to struct kvm_gfn_range
[RFC PATCH v2 6/6] KVM: x86: Add is_vm_type_supported callback