From: Isaku Yamahata <isaku.yamahata@intel.com>
Extract out __kvm_mmu_do_page_fault() from kvm_mmu_do_page_fault(). The
inner function is to initialize struct kvm_page_fault and to call the fault
handler, and the outer function handles updating stats and converting
return code. KVM_MAP_MEMORY will call the KVM page fault handler.
This patch makes the emulation_type always set irrelevant to the return
code. kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(),
and references the value only when PF_RET_EMULATE is returned. Therefore,
this adjustment doesn't affect functionality.
No functional change intended.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
v2:
- Newly introduced. (Sean)
---
arch/x86/kvm/mmu/mmu_internal.h | 32 +++++++++++++++++++++-----------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index e68a60974cf4..9baae6c223ee 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -287,8 +287,8 @@ static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
fault->is_private);
}
-static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- u64 err, bool prefetch, int *emulation_type)
+static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ u64 err, bool prefetch, int *emulation_type)
{
struct kvm_page_fault fault = {
.addr = cr2_or_gpa,
@@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
}
- /*
- * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
- * guest perspective and have already been counted at the time of the
- * original fault.
- */
- if (!prefetch)
- vcpu->stat.pf_taken++;
-
if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
r = kvm_tdp_page_fault(vcpu, &fault);
else
@@ -333,12 +325,30 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (r == RET_PF_EMULATE && fault.is_private) {
kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
- return -EFAULT;
+ r = -EFAULT;
}
if (fault.write_fault_to_shadow_pgtable && emulation_type)
*emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
+ return r;
+}
+
+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ u64 err, bool prefetch, int *emulation_type)
+{
+ int r;
+
+ /*
+ * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
+ * guest perspective and have already been counted at the time of the
+ * original fault.
+ */
+ if (!prefetch)
+ vcpu->stat.pf_taken++;
+
+ r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, emulation_type);
+
/*
* Similar to above, prefetch faults aren't truly spurious, and the
* async #PF path doesn't do emulation. Do count faults that are fixed
--
2.43.2
On Wed, 2024-04-10 at 15:07 -0700, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Extract out __kvm_mmu_do_page_fault() from kvm_mmu_do_page_fault(). The
> inner function is to initialize struct kvm_page_fault and to call the fault
> handler, and the outer function handles updating stats and converting
> return code. KVM_MAP_MEMORY will call the KVM page fault handler.
>
> This patch makes the emulation_type always set irrelevant to the return
a comma would help parse this better ^
> code.
> kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(),
Not technically correct, there are other callers that pass NULL for
emulation_type.
> and references the value only when PF_RET_EMULATE is returned. Therefore,
> this adjustment doesn't affect functionality.
Is there a problem with dropping the argument then?
>
> No functional change intended.
Can we not use the "intended"? It sounds like hedging for excuses.
>
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> v2:
> - Newly introduced. (Sean)
> ---
> arch/x86/kvm/mmu/mmu_internal.h | 32 +++++++++++++++++++++-----------
> 1 file changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index e68a60974cf4..9baae6c223ee 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -287,8 +287,8 @@ static inline void
> kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
> fault->is_private);
> }
>
> -static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t
> cr2_or_gpa,
> - u64 err, bool prefetch, int
> *emulation_type)
> +static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t
> cr2_or_gpa,
> + u64 err, bool prefetch, int
> *emulation_type)
> {
> struct kvm_page_fault fault = {
> .addr = cr2_or_gpa,
> @@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu
> *vcpu, gpa_t cr2_or_gpa,
> fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
> }
>
> - /*
> - * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
> - * guest perspective and have already been counted at the time of the
> - * original fault.
> - */
> - if (!prefetch)
> - vcpu->stat.pf_taken++;
> -
> if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
> r = kvm_tdp_page_fault(vcpu, &fault);
> else
> @@ -333,12 +325,30 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu
> *vcpu, gpa_t cr2_or_gpa,
>
> if (r == RET_PF_EMULATE && fault.is_private) {
> kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
> - return -EFAULT;
> + r = -EFAULT;
> }
>
> if (fault.write_fault_to_shadow_pgtable && emulation_type)
> *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
>
> + return r;
> +}
> +
> +static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t
> cr2_or_gpa,
> + u64 err, bool prefetch, int
> *emulation_type)
> +{
> + int r;
> +
> + /*
> + * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
> + * guest perspective and have already been counted at the time of the
> + * original fault.
> + */
> + if (!prefetch)
> + vcpu->stat.pf_taken++;
From the name, it makes sense to not count KVM_MAP_MEMORY as a pf_taken. But
kvm_arch_async_page_ready() increments it as well. Which makes it more like a
"faulted-in" count. I think the code in this patch is ok.
> +
> + r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch,
> emulation_type);
> +
> /*
> * Similar to above, prefetch faults aren't truly spurious, and the
> * async #PF path doesn't do emulation. Do count faults that are
> fixed
On Tue, Apr 16, 2024 at 02:36:31PM +0000, "Edgecombe, Rick P" <rick.p.edgecombe@intel.com> wrote: > On Wed, 2024-04-10 at 15:07 -0700, isaku.yamahata@intel.com wrote: > > From: Isaku Yamahata <isaku.yamahata@intel.com> > > > > Extract out __kvm_mmu_do_page_fault() from kvm_mmu_do_page_fault(). The > > inner function is to initialize struct kvm_page_fault and to call the fault > > handler, and the outer function handles updating stats and converting > > return code. KVM_MAP_MEMORY will call the KVM page fault handler. > > > > This patch makes the emulation_type always set irrelevant to the return > a comma would help parse this better ^ > > code. > > > kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(), > > Not technically correct, there are other callers that pass NULL for > emulation_type. > > > and references the value only when PF_RET_EMULATE is returned. Therefore, > > this adjustment doesn't affect functionality. > > Is there a problem with dropping the argument then? > > > > > No functional change intended. > > Can we not use the "intended"? It sounds like hedging for excuses. Thanks for review. As Chao pointed out, this patch is unnecessary. I'll use kvm_mmu_do_page_fault() directly with updating vcpu->stat. https://lore.kernel.org/all/20240416234334.GA3039520@ls.amr.corp.intel.com/ -- Isaku Yamahata <isaku.yamahata@intel.com>
On Wed, Apr 17, 2024 at 1:52 AM Isaku Yamahata <isaku.yamahata@intel.com> wrote: > As Chao pointed out, this patch is unnecessary. I'll use > kvm_mmu_do_page_fault() directly with updating vcpu->stat. Actually I prefer to have this patch. pf_* stats do not make sense for pre-population, and updating them confuses things because pre-population (outside TDX) has the purpose of avoiding page faults. Paolo
>This patch makes the emulation_type always set irrelevant to the return
>code. kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(),
>and references the value only when PF_RET_EMULATE is returned. Therefore,
>this adjustment doesn't affect functionality.
This is benign. But what's the benefit of doing this?
>+static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>+ u64 err, bool prefetch, int *emulation_type)
> {
> struct kvm_page_fault fault = {
> .addr = cr2_or_gpa,
>@@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
> }
>
>- /*
>- * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
>- * guest perspective and have already been counted at the time of the
>- * original fault.
>- */
>- if (!prefetch)
>- vcpu->stat.pf_taken++;
>-
> if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
> r = kvm_tdp_page_fault(vcpu, &fault);
> else
>@@ -333,12 +325,30 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>
> if (r == RET_PF_EMULATE && fault.is_private) {
> kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
>- return -EFAULT;
>+ r = -EFAULT;
> }
>
> if (fault.write_fault_to_shadow_pgtable && emulation_type)
> *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
>
>+ return r;
>+}
>+
>+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>+ u64 err, bool prefetch, int *emulation_type)
>+{
>+ int r;
>+
>+ /*
>+ * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
>+ * guest perspective and have already been counted at the time of the
>+ * original fault.
>+ */
>+ if (!prefetch)
>+ vcpu->stat.pf_taken++;
>+
>+ r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, emulation_type);
bail out if r < 0?
>+
> /*
> * Similar to above, prefetch faults aren't truly spurious, and the
> * async #PF path doesn't do emulation. Do count faults that are fixed
>--
>2.43.2
>
>
On Tue, Apr 16, 2024 at 04:22:35PM +0800,
Chao Gao <chao.gao@intel.com> wrote:
>
> >This patch makes the emulation_type always set irrelevant to the return
> >code. kvm_mmu_page_fault() is the only caller of kvm_mmu_do_page_fault(),
> >and references the value only when PF_RET_EMULATE is returned. Therefore,
> >this adjustment doesn't affect functionality.
>
> This is benign. But what's the benefit of doing this?
To avoid increment vcpu->stat. Because originally this was VM ioctl, I wanted
to avoid touch vCPU stat. Now it's vCPU ioctl, it's fine to increment them.
Probably we can drop this patch and use kvm_mmu_do_page_fault().
>
> >+static inline int __kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> >+ u64 err, bool prefetch, int *emulation_type)
> > {
> > struct kvm_page_fault fault = {
> > .addr = cr2_or_gpa,
> >@@ -318,14 +318,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> > fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
> > }
> >
> >- /*
> >- * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
> >- * guest perspective and have already been counted at the time of the
> >- * original fault.
> >- */
> >- if (!prefetch)
> >- vcpu->stat.pf_taken++;
> >-
> > if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
> > r = kvm_tdp_page_fault(vcpu, &fault);
> > else
> >@@ -333,12 +325,30 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> >
> > if (r == RET_PF_EMULATE && fault.is_private) {
> > kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
> >- return -EFAULT;
> >+ r = -EFAULT;
> > }
> >
> > if (fault.write_fault_to_shadow_pgtable && emulation_type)
> > *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
> >
> >+ return r;
> >+}
> >+
> >+static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> >+ u64 err, bool prefetch, int *emulation_type)
> >+{
> >+ int r;
> >+
> >+ /*
> >+ * Async #PF "faults", a.k.a. prefetch faults, are not faults from the
> >+ * guest perspective and have already been counted at the time of the
> >+ * original fault.
> >+ */
> >+ if (!prefetch)
> >+ vcpu->stat.pf_taken++;
> >+
> >+ r = __kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, err, prefetch, emulation_type);
>
> bail out if r < 0?
The following if clauses checks RET_PF_xxx > 0.
--
Isaku Yamahata <isaku.yamahata@intel.com>
© 2016 - 2026 Red Hat, Inc.