From: Isaku Yamahata <isaku.yamahata@intel.com>
Introduce a helper function to call kvm fault handler. This allows
a new ioctl to invoke kvm fault handler to populate without seeing
RET_PF_* enums or other KVM MMU internal definitions.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
arch/x86/kvm/mmu.h | 3 +++
arch/x86/kvm/mmu/mmu.c | 30 ++++++++++++++++++++++++++++++
2 files changed, 33 insertions(+)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 60f21bb4c27b..48870c5e08ec 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -183,6 +183,9 @@ static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
__kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
}
+int kvm_mmu_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ u8 max_level, u8 *goal_level);
+
/*
* Check if a given access (described through the I/D, W/R and U/S bits of a
* page fault error code pfec) causes a permission fault with the given PTE
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e4cc7f764980..7d5e80d17977 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4659,6 +4659,36 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
return direct_page_fault(vcpu, fault);
}
+int kvm_mmu_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ u8 max_level, u8 *goal_level)
+{
+ struct kvm_page_fault fault = KVM_PAGE_FAULT_INIT(vcpu, gpa, error_code,
+ false, max_level);
+ int r;
+
+ r = __kvm_mmu_do_page_fault(vcpu, &fault);
+
+ if (is_error_noslot_pfn(fault.pfn) || vcpu->kvm->vm_bugged)
+ return -EFAULT;
+
+ switch (r) {
+ case RET_PF_RETRY:
+ return -EAGAIN;
+
+ case RET_PF_FIXED:
+ case RET_PF_SPURIOUS:
+ *goal_level = fault.goal_level;
+ return 0;
+
+ case RET_PF_CONTINUE:
+ case RET_PF_EMULATE:
+ case RET_PF_INVALID:
+ default:
+ return -EIO;
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_map_page);
+
static void nonpaging_init_context(struct kvm_mmu *context)
{
context->page_fault = nonpaging_page_fault;
--
2.25.1
On Fri, Mar 01, 2024, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Introduce a helper function to call kvm fault handler. This allows
> a new ioctl to invoke kvm fault handler to populate without seeing
> RET_PF_* enums or other KVM MMU internal definitions.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> arch/x86/kvm/mmu.h | 3 +++
> arch/x86/kvm/mmu/mmu.c | 30 ++++++++++++++++++++++++++++++
> 2 files changed, 33 insertions(+)
>
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 60f21bb4c27b..48870c5e08ec 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -183,6 +183,9 @@ static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
> __kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
> }
>
> +int kvm_mmu_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
> + u8 max_level, u8 *goal_level);
> +
> /*
> * Check if a given access (described through the I/D, W/R and U/S bits of a
> * page fault error code pfec) causes a permission fault with the given PTE
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e4cc7f764980..7d5e80d17977 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4659,6 +4659,36 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> return direct_page_fault(vcpu, fault);
> }
>
> +int kvm_mmu_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
> + u8 max_level, u8 *goal_level)
> +{
> + struct kvm_page_fault fault = KVM_PAGE_FAULT_INIT(vcpu, gpa, error_code,
> + false, max_level);
> + int r;
> +
> + r = __kvm_mmu_do_page_fault(vcpu, &fault);
> +
> + if (is_error_noslot_pfn(fault.pfn) || vcpu->kvm->vm_bugged)
> + return -EFAULT;
This clobbers a non-zero 'r'. And KVM return -EIO if the VM is bugged/dead, not
-EFAULT. I also don't see why KVM needs to explicitly check is_error_noslot_pfn(),
that should be funneled to RET_PF_EMULATE.
> +
> + switch (r) {
> + case RET_PF_RETRY:
> + return -EAGAIN;
> +
> + case RET_PF_FIXED:
> + case RET_PF_SPURIOUS:
> + *goal_level = fault.goal_level;
> + return 0;
> +
> + case RET_PF_CONTINUE:
> + case RET_PF_EMULATE:
-EINVAL woud be more appropriate for RET_PF_EMULATE.
> + case RET_PF_INVALID:
CONTINUE and INVALID should be WARN conditions.
> + default:
> + return -EIO;
> + }
> +}
> +EXPORT_SYMBOL_GPL(kvm_mmu_map_page);
Unnecessary export.
> +
> static void nonpaging_init_context(struct kvm_mmu *context)
> {
> context->page_fault = nonpaging_page_fault;
> --
> 2.25.1
>
On Mon, Mar 11, 2024 at 10:29:01AM -0700,
Sean Christopherson <seanjc@google.com> wrote:
> On Fri, Mar 01, 2024, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> >
> > Introduce a helper function to call kvm fault handler. This allows
> > a new ioctl to invoke kvm fault handler to populate without seeing
> > RET_PF_* enums or other KVM MMU internal definitions.
> >
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> > arch/x86/kvm/mmu.h | 3 +++
> > arch/x86/kvm/mmu/mmu.c | 30 ++++++++++++++++++++++++++++++
> > 2 files changed, 33 insertions(+)
> >
> > diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> > index 60f21bb4c27b..48870c5e08ec 100644
> > --- a/arch/x86/kvm/mmu.h
> > +++ b/arch/x86/kvm/mmu.h
> > @@ -183,6 +183,9 @@ static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
> > __kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
> > }
> >
> > +int kvm_mmu_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
> > + u8 max_level, u8 *goal_level);
> > +
> > /*
> > * Check if a given access (described through the I/D, W/R and U/S bits of a
> > * page fault error code pfec) causes a permission fault with the given PTE
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index e4cc7f764980..7d5e80d17977 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -4659,6 +4659,36 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> > return direct_page_fault(vcpu, fault);
> > }
> >
> > +int kvm_mmu_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
> > + u8 max_level, u8 *goal_level)
> > +{
> > + struct kvm_page_fault fault = KVM_PAGE_FAULT_INIT(vcpu, gpa, error_code,
> > + false, max_level);
> > + int r;
> > +
> > + r = __kvm_mmu_do_page_fault(vcpu, &fault);
> > +
> > + if (is_error_noslot_pfn(fault.pfn) || vcpu->kvm->vm_bugged)
> > + return -EFAULT;
>
> This clobbers a non-zero 'r'. And KVM return -EIO if the VM is bugged/dead, not
> -EFAULT. I also don't see why KVM needs to explicitly check is_error_noslot_pfn(),
> that should be funneled to RET_PF_EMULATE.
I'll drop this check.
> > +
> > + switch (r) {
> > + case RET_PF_RETRY:
> > + return -EAGAIN;
> > +
> > + case RET_PF_FIXED:
> > + case RET_PF_SPURIOUS:
> > + *goal_level = fault.goal_level;
> > + return 0;
> > +
> > + case RET_PF_CONTINUE:
> > + case RET_PF_EMULATE:
>
> -EINVAL woud be more appropriate for RET_PF_EMULATE.
>
> > + case RET_PF_INVALID:
>
> CONTINUE and INVALID should be WARN conditions.
Will update them.
--
Isaku Yamahata <isaku.yamahata@intel.com>
On 2024-03-01 09:28 AM, isaku.yamahata@intel.com wrote:
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e4cc7f764980..7d5e80d17977 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4659,6 +4659,36 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> return direct_page_fault(vcpu, fault);
> }
>
> +int kvm_mmu_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
> + u8 max_level, u8 *goal_level)
> +{
> + struct kvm_page_fault fault = KVM_PAGE_FAULT_INIT(vcpu, gpa, error_code,
> + false, max_level);
> + int r;
> +
> + r = __kvm_mmu_do_page_fault(vcpu, &fault);
If TDP is disabled __kvm_mmu_do_page_fault() will interpret @gpa as a
GVA no? And if the vCPU is in guest-mode __kvm_mmu_do_page_fault() will
interpret gpa as a nGPA right?
On Wed, Mar 06, 2024 at 04:38:53PM -0800,
David Matlack <dmatlack@google.com> wrote:
> On 2024-03-01 09:28 AM, isaku.yamahata@intel.com wrote:
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index e4cc7f764980..7d5e80d17977 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -4659,6 +4659,36 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> > return direct_page_fault(vcpu, fault);
> > }
> >
> > +int kvm_mmu_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
> > + u8 max_level, u8 *goal_level)
> > +{
> > + struct kvm_page_fault fault = KVM_PAGE_FAULT_INIT(vcpu, gpa, error_code,
> > + false, max_level);
> > + int r;
> > +
> > + r = __kvm_mmu_do_page_fault(vcpu, &fault);
>
> If TDP is disabled __kvm_mmu_do_page_fault() will interpret @gpa as a
> GVA no? And if the vCPU is in guest-mode __kvm_mmu_do_page_fault() will
> interpret gpa as a nGPA right?
Just to close the discussion.
As we discussed at [1], I'd lie to restrict the API to TDP MMU only
(with the next version). If vCPU is in guest-mode or legacy MMU mode, it will
get error.
[1] https://lore.kernel.org/kvm/ZekQFdPlU7RDVt-B@google.com/
--
Isaku Yamahata <isaku.yamahata@intel.com>
© 2016 - 2026 Red Hat, Inc.