spte_has_volatile_bits() is now a misnomer, as the an SPTE can have its
Accessed bit set or cleared without the mmu_lock held, but the state of
the Accessed bit is not checked in spte_has_volatile_bits().
Even if a caller uses spte_needs_atomic_write(), Accessed bit
information may still be lost, but that is already tolerated, as the TLB
is not invalidated after the Accessed bit is cleared.
Signed-off-by: James Houghton <jthoughton@google.com>
---
Documentation/virt/kvm/locking.rst | 4 ++--
arch/x86/kvm/mmu/mmu.c | 4 ++--
arch/x86/kvm/mmu/spte.c | 9 +++++----
arch/x86/kvm/mmu/spte.h | 2 +-
arch/x86/kvm/mmu/tdp_iter.h | 2 +-
5 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index c56d5f26c750..4720053c70a3 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -196,7 +196,7 @@ writable between reading spte and updating spte. Like below case:
The Dirty bit is lost in this case.
In order to avoid this kind of issue, we always treat the spte as "volatile"
-if it can be updated out of mmu-lock [see spte_has_volatile_bits()]; it means
+if it can be updated out of mmu-lock [see spte_needs_atomic_write()]; it means
the spte is always atomically updated in this case.
3) flush tlbs due to spte updated
@@ -212,7 +212,7 @@ function to update spte (present -> present).
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
atomically update the spte and the race caused by fast page fault can be avoided.
-See the comments in spte_has_volatile_bits() and mmu_spte_update().
+See the comments in spte_needs_atomic_write() and mmu_spte_update().
Lockless Access Tracking:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7779b49f386d..1fa0f47eb6a5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -501,7 +501,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
return false;
}
- if (!spte_has_volatile_bits(old_spte))
+ if (!spte_needs_atomic_write(old_spte))
__update_clear_spte_fast(sptep, new_spte);
else
old_spte = __update_clear_spte_slow(sptep, new_spte);
@@ -524,7 +524,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
int level = sptep_to_sp(sptep)->role.level;
if (!is_shadow_present_pte(old_spte) ||
- !spte_has_volatile_bits(old_spte))
+ !spte_needs_atomic_write(old_spte))
__update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE);
else
old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE);
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index e984b440c0f0..ae2017cc1239 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -129,11 +129,12 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
}
/*
- * Returns true if the SPTE has bits that may be set without holding mmu_lock.
- * The caller is responsible for checking if the SPTE is shadow-present, and
- * for determining whether or not the caller cares about non-leaf SPTEs.
+ * Returns true if the SPTE has bits other than the Accessed bit that may be
+ * changed without holding mmu_lock. The caller is responsible for checking if
+ * the SPTE is shadow-present, and for determining whether or not the caller
+ * cares about non-leaf SPTEs.
*/
-bool spte_has_volatile_bits(u64 spte)
+bool spte_needs_atomic_write(u64 spte)
{
if (!is_writable_pte(spte) && is_mmu_writable_spte(spte))
return true;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 59746854c0af..4c290ae9a02a 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -519,7 +519,7 @@ static inline u64 get_mmio_spte_generation(u64 spte)
return gen;
}
-bool spte_has_volatile_bits(u64 spte);
+bool spte_needs_atomic_write(u64 spte);
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
const struct kvm_memory_slot *slot,
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index 05e9d678aac9..b54123163efc 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -55,7 +55,7 @@ static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
{
return is_shadow_present_pte(old_spte) &&
is_last_spte(old_spte, level) &&
- spte_has_volatile_bits(old_spte);
+ spte_needs_atomic_write(old_spte);
}
static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
--
2.48.1.362.g079036d154-goog
On Tue, Feb 04, 2025, James Houghton wrote:
> spte_has_volatile_bits() is now a misnomer, as the an SPTE can have its
> Accessed bit set or cleared without the mmu_lock held, but the state of
> the Accessed bit is not checked in spte_has_volatile_bits().
> Even if a caller uses spte_needs_atomic_write(), Accessed bit
> information may still be lost, but that is already tolerated, as the TLB
> is not invalidated after the Accessed bit is cleared.
>
> Signed-off-by: James Houghton <jthoughton@google.com>
> ---
...
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 59746854c0af..4c290ae9a02a 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -519,7 +519,7 @@ static inline u64 get_mmio_spte_generation(u64 spte)
> return gen;
> }
>
> -bool spte_has_volatile_bits(u64 spte);
> +bool spte_needs_atomic_write(u64 spte);
>
> bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> const struct kvm_memory_slot *slot,
> diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
> index 05e9d678aac9..b54123163efc 100644
> --- a/arch/x86/kvm/mmu/tdp_iter.h
> +++ b/arch/x86/kvm/mmu/tdp_iter.h
> @@ -55,7 +55,7 @@ static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
> {
> return is_shadow_present_pte(old_spte) &&
> is_last_spte(old_spte, level) &&
> - spte_has_volatile_bits(old_spte);
> + spte_needs_atomic_write(old_spte);
Unless you object, I'll change this to spte_needs_atomic_update(), and tweak
kvm_tdp_mmu_spte_need_atomic_write() accordingly. "write" was a bad choice by
me. It's not just the store/write that needs to be atomic, it's the entire
read-modify-write. E.g. KVM needs to preserve the existing value, but for many
flows, it's even more important that KVM's snapshot of the old SPTE is accurate.
> }
>
> static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
> --
> 2.48.1.362.g079036d154-goog
>
On Wed, Feb 12, 2025 at 2:09 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Tue, Feb 04, 2025, James Houghton wrote:
> > spte_has_volatile_bits() is now a misnomer, as the an SPTE can have its
> > Accessed bit set or cleared without the mmu_lock held, but the state of
> > the Accessed bit is not checked in spte_has_volatile_bits().
> > Even if a caller uses spte_needs_atomic_write(), Accessed bit
> > information may still be lost, but that is already tolerated, as the TLB
> > is not invalidated after the Accessed bit is cleared.
> >
> > Signed-off-by: James Houghton <jthoughton@google.com>
> > ---
>
> ...
>
> > diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> > index 59746854c0af..4c290ae9a02a 100644
> > --- a/arch/x86/kvm/mmu/spte.h
> > +++ b/arch/x86/kvm/mmu/spte.h
> > @@ -519,7 +519,7 @@ static inline u64 get_mmio_spte_generation(u64 spte)
> > return gen;
> > }
> >
> > -bool spte_has_volatile_bits(u64 spte);
> > +bool spte_needs_atomic_write(u64 spte);
> >
> > bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> > const struct kvm_memory_slot *slot,
> > diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
> > index 05e9d678aac9..b54123163efc 100644
> > --- a/arch/x86/kvm/mmu/tdp_iter.h
> > +++ b/arch/x86/kvm/mmu/tdp_iter.h
> > @@ -55,7 +55,7 @@ static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
> > {
> > return is_shadow_present_pte(old_spte) &&
> > is_last_spte(old_spte, level) &&
> > - spte_has_volatile_bits(old_spte);
> > + spte_needs_atomic_write(old_spte);
>
> Unless you object, I'll change this to spte_needs_atomic_update(), and tweak
> kvm_tdp_mmu_spte_need_atomic_write() accordingly. "write" was a bad choice by
> me. It's not just the store/write that needs to be atomic, it's the entire
> read-modify-write. E.g. KVM needs to preserve the existing value, but for many
> flows, it's even more important that KVM's snapshot of the old SPTE is accurate.
No objections, please make that change. Thanks!
© 2016 - 2025 Red Hat, Inc.