From: Isaku Yamahata <isaku.yamahata@intel.com>
Plumb the full 64-bit error code throughout the page fault handling code
so that KVM can use the upper 32 bits, e.g. SNP's PFERR_GUEST_ENC_MASK
will be used to determine whether or not a fault is private vs. shared.
Note, passing the 64-bit error code to FNAME(walk_addr)() does NOT change
the behavior of permission_fault() when invoked in the page fault path, as
KVM explicitly clears PFERR_IMPLICIT_ACCESS in kvm_mmu_page_fault().
Continue passing '0' from the async #PF worker, as guest_memfd() and thus
private memory doesn't support async page faults.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
[mdr: drop references/changes on rebase, update commit message]
Signed-off-by: Michael Roth <michael.roth@amd.com>
[sean: drop truncation in call to FNAME(walk_addr)(), rewrite changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/mmu/mmu.c | 3 +--
arch/x86/kvm/mmu/mmu_internal.h | 4 ++--
arch/x86/kvm/mmu/mmutrace.h | 2 +-
3 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e2fd74e06ff8..408969ac1291 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5860,8 +5860,7 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
}
if (r == RET_PF_INVALID) {
- r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
- lower_32_bits(error_code), false,
+ r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false,
&emulation_type);
if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
return -EIO;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 0eea6c5a824d..1fab1f2359b5 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -190,7 +190,7 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
struct kvm_page_fault {
/* arguments to kvm_mmu_do_page_fault. */
const gpa_t addr;
- const u32 error_code;
+ const u64 error_code;
const bool prefetch;
/* Derived from error_code. */
@@ -288,7 +288,7 @@ static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
}
static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- u32 err, bool prefetch, int *emulation_type)
+ u64 err, bool prefetch, int *emulation_type)
{
struct kvm_page_fault fault = {
.addr = cr2_or_gpa,
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index ae86820cef69..195d98bc8de8 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -260,7 +260,7 @@ TRACE_EVENT(
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(gpa_t, cr2_or_gpa)
- __field(u32, error_code)
+ __field(u64, error_code)
__field(u64 *, sptep)
__field(u64, old_spte)
__field(u64, new_spte)
--
2.44.0.278.ge034bb2e1d-goog
On 2/28/2024 10:41 AM, Sean Christopherson wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Plumb the full 64-bit error code throughout the page fault handling code
> so that KVM can use the upper 32 bits, e.g. SNP's PFERR_GUEST_ENC_MASK
> will be used to determine whether or not a fault is private vs. shared.
>
> Note, passing the 64-bit error code to FNAME(walk_addr)() does NOT change
> the behavior of permission_fault() when invoked in the page fault path, as
> KVM explicitly clears PFERR_IMPLICIT_ACCESS in kvm_mmu_page_fault().
>
> Continue passing '0' from the async #PF worker, as guest_memfd() and thus
> private memory doesn't support async page faults.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> [mdr: drop references/changes on rebase, update commit message]
> Signed-off-by: Michael Roth <michael.roth@amd.com>
> [sean: drop truncation in call to FNAME(walk_addr)(), rewrite changelog]
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> arch/x86/kvm/mmu/mmu.c | 3 +--
> arch/x86/kvm/mmu/mmu_internal.h | 4 ++--
> arch/x86/kvm/mmu/mmutrace.h | 2 +-
> 3 files changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e2fd74e06ff8..408969ac1291 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -5860,8 +5860,7 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
> }
>
> if (r == RET_PF_INVALID) {
> - r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
> - lower_32_bits(error_code), false,
> + r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false,
> &emulation_type);
> if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
> return -EIO;
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 0eea6c5a824d..1fab1f2359b5 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -190,7 +190,7 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> struct kvm_page_fault {
> /* arguments to kvm_mmu_do_page_fault. */
> const gpa_t addr;
> - const u32 error_code;
> + const u64 error_code;
> const bool prefetch;
>
> /* Derived from error_code. */
> @@ -288,7 +288,7 @@ static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
> }
>
> static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> - u32 err, bool prefetch, int *emulation_type)
> + u64 err, bool prefetch, int *emulation_type)
> {
> struct kvm_page_fault fault = {
> .addr = cr2_or_gpa,
> diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
> index ae86820cef69..195d98bc8de8 100644
> --- a/arch/x86/kvm/mmu/mmutrace.h
> +++ b/arch/x86/kvm/mmu/mmutrace.h
> @@ -260,7 +260,7 @@ TRACE_EVENT(
> TP_STRUCT__entry(
> __field(int, vcpu_id)
> __field(gpa_t, cr2_or_gpa)
> - __field(u32, error_code)
> + __field(u64, error_code)
> __field(u64 *, sptep)
> __field(u64, old_spte)
> __field(u64, new_spte)
On 2/27/24 18:41, Sean Christopherson wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Plumb the full 64-bit error code throughout the page fault handling code
> so that KVM can use the upper 32 bits, e.g. SNP's PFERR_GUEST_ENC_MASK
> will be used to determine whether or not a fault is private vs. shared.
>
> Note, passing the 64-bit error code to FNAME(walk_addr)() does NOT change
> the behavior of permission_fault() when invoked in the page fault path, as
> KVM explicitly clears PFERR_IMPLICIT_ACCESS in kvm_mmu_page_fault().
May this lead to a WARN_ON_ONCE?
5843 int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
u64 error_code,
5844 void *insn, int insn_len)
5845 {
... ...
5856 */
5857 if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
5858 error_code &= ~PFERR_IMPLICIT_ACCESS;
>
> Continue passing '0' from the async #PF worker, as guest_memfd() and thus
:s/guest_memfd()/guest_memfd/ ?
Dongli Zhang
> private memory doesn't support async page faults.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> [mdr: drop references/changes on rebase, update commit message]
> Signed-off-by: Michael Roth <michael.roth@amd.com>
> [sean: drop truncation in call to FNAME(walk_addr)(), rewrite changelog]
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/kvm/mmu/mmu.c | 3 +--
> arch/x86/kvm/mmu/mmu_internal.h | 4 ++--
> arch/x86/kvm/mmu/mmutrace.h | 2 +-
> 3 files changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e2fd74e06ff8..408969ac1291 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -5860,8 +5860,7 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
> }
>
> if (r == RET_PF_INVALID) {
> - r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
> - lower_32_bits(error_code), false,
> + r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false,
> &emulation_type);
> if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
> return -EIO;
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 0eea6c5a824d..1fab1f2359b5 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -190,7 +190,7 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> struct kvm_page_fault {
> /* arguments to kvm_mmu_do_page_fault. */
> const gpa_t addr;
> - const u32 error_code;
> + const u64 error_code;
> const bool prefetch;
>
> /* Derived from error_code. */
> @@ -288,7 +288,7 @@ static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
> }
>
> static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> - u32 err, bool prefetch, int *emulation_type)
> + u64 err, bool prefetch, int *emulation_type)
> {
> struct kvm_page_fault fault = {
> .addr = cr2_or_gpa,
> diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
> index ae86820cef69..195d98bc8de8 100644
> --- a/arch/x86/kvm/mmu/mmutrace.h
> +++ b/arch/x86/kvm/mmu/mmutrace.h
> @@ -260,7 +260,7 @@ TRACE_EVENT(
> TP_STRUCT__entry(
> __field(int, vcpu_id)
> __field(gpa_t, cr2_or_gpa)
> - __field(u32, error_code)
> + __field(u64, error_code)
> __field(u64 *, sptep)
> __field(u64, old_spte)
> __field(u64, new_spte)
On Tue, Feb 27, 2024, Dongli Zhang wrote:
>
>
> On 2/27/24 18:41, Sean Christopherson wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> >
> > Plumb the full 64-bit error code throughout the page fault handling code
> > so that KVM can use the upper 32 bits, e.g. SNP's PFERR_GUEST_ENC_MASK
> > will be used to determine whether or not a fault is private vs. shared.
> >
> > Note, passing the 64-bit error code to FNAME(walk_addr)() does NOT change
> > the behavior of permission_fault() when invoked in the page fault path, as
> > KVM explicitly clears PFERR_IMPLICIT_ACCESS in kvm_mmu_page_fault().
>
> May this lead to a WARN_ON_ONCE?
>
> 5843 int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> u64 error_code,
> 5844 void *insn, int insn_len)
> 5845 {
> ... ...
> 5856 */
> 5857 if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
> 5858 error_code &= ~PFERR_IMPLICIT_ACCESS;
Nope, it shouldn't. PFERR_IMPLICIT_ACCESS is a synthetic, KVM-defined flag, and
should never be in the error code passed to kvm_mmu_page_fault(). If the WARN
fires, it means hardware (specifically, AMD CPUs for #NPF) has started using the
bit for something, and that we need to update KVM to use a different bit.
> > Continue passing '0' from the async #PF worker, as guest_memfd() and thus
>
> :s/guest_memfd()/guest_memfd/ ?
I've been styling it as guest_memfd() to make it look like a syscall, e.g. like
memfd_create(), when I'm talking about a file that was created by userspace, as
opposed to GUEST_MEMFD when I'm talking about the ioctl() itself.
On 2/28/24 08:22, Sean Christopherson wrote:
> On Tue, Feb 27, 2024, Dongli Zhang wrote:
>>
>>
>> On 2/27/24 18:41, Sean Christopherson wrote:
>>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>>
>>> Plumb the full 64-bit error code throughout the page fault handling code
>>> so that KVM can use the upper 32 bits, e.g. SNP's PFERR_GUEST_ENC_MASK
>>> will be used to determine whether or not a fault is private vs. shared.
>>>
>>> Note, passing the 64-bit error code to FNAME(walk_addr)() does NOT change
>>> the behavior of permission_fault() when invoked in the page fault path, as
>>> KVM explicitly clears PFERR_IMPLICIT_ACCESS in kvm_mmu_page_fault().
>>
>> May this lead to a WARN_ON_ONCE?
>>
>> 5843 int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>> u64 error_code,
>> 5844 void *insn, int insn_len)
>> 5845 {
>> ... ...
>> 5856 */
>> 5857 if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
>> 5858 error_code &= ~PFERR_IMPLICIT_ACCESS;
>
> Nope, it shouldn't. PFERR_IMPLICIT_ACCESS is a synthetic, KVM-defined flag, and
> should never be in the error code passed to kvm_mmu_page_fault(). If the WARN
> fires, it means hardware (specifically, AMD CPUs for #NPF) has started using the
> bit for something, and that we need to update KVM to use a different bit.
Thank you very much for the explanation.
I see it is impossible to have PFERR_IMPLICIT_ACCESS set here, unless there is
AMD hardware issue or Intel page fault handler morphs the error_code erroneously.
I meant the above commit message confused me when I was reading it. E.g., how
about something like:
"Note, passing the 64-bit error code to FNAME(walk_addr)() does NOT change
the behavior of permission_fault() when invoked in the page fault path, as it
should never be in the error code because ...."
Thank you very much!
Dongli Zhang
>
>>> Continue passing '0' from the async #PF worker, as guest_memfd() and thus
>>
>> :s/guest_memfd()/guest_memfd/ ?
>
> I've been styling it as guest_memfd() to make it look like a syscall, e.g. like
> memfd_create(), when I'm talking about a file that was created by userspace, as
> opposed to GUEST_MEMFD when I'm talking about the ioctl() itself.
© 2016 - 2026 Red Hat, Inc.