It is possible to correctly do aging without taking the KVM MMU lock;
this option allows such architectures to do so. Architectures that
select CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS are responsible for
correctness.
Suggested-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
---
include/linux/kvm_host.h | 1 +
virt/kvm/Kconfig | 2 ++
virt/kvm/kvm_main.c | 24 +++++++++++++++++-------
3 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f34f4cfaa513..c28a6aa1f2ed 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -267,6 +267,7 @@ struct kvm_gfn_range {
union kvm_mmu_notifier_arg arg;
enum kvm_gfn_range_filter attr_filter;
bool may_block;
+ bool lockless;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 54e959e7d68f..9356f4e4e255 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -102,6 +102,8 @@ config KVM_GENERIC_MMU_NOTIFIER
config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
depends on KVM_GENERIC_MMU_NOTIFIER
+
+config KVM_MMU_NOTIFIER_AGING_LOCKLESS
bool
config KVM_GENERIC_MEMORY_ATTRIBUTES
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1bd49770506a..4734ae9e8a54 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -517,6 +517,7 @@ struct kvm_mmu_notifier_range {
on_lock_fn_t on_lock;
bool flush_on_ret;
bool may_block;
+ bool lockless;
};
/*
@@ -571,6 +572,10 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
IS_KVM_NULL_FN(range->handler)))
return r;
+ /* on_lock will never be called for lockless walks */
+ if (WARN_ON_ONCE(range->lockless && !IS_KVM_NULL_FN(range->on_lock)))
+ return r;
+
idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
@@ -607,15 +612,18 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
gfn_range.start = hva_to_gfn_memslot(hva_start, slot);
gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
gfn_range.slot = slot;
+ gfn_range.lockless = range->lockless;
if (!r.found_memslot) {
r.found_memslot = true;
- KVM_MMU_LOCK(kvm);
- if (!IS_KVM_NULL_FN(range->on_lock))
- range->on_lock(kvm);
-
- if (IS_KVM_NULL_FN(range->handler))
- goto mmu_unlock;
+ if (!range->lockless) {
+ KVM_MMU_LOCK(kvm);
+ if (!IS_KVM_NULL_FN(range->on_lock))
+ range->on_lock(kvm);
+
+ if (IS_KVM_NULL_FN(range->handler))
+ goto mmu_unlock;
+ }
}
r.ret |= range->handler(kvm, &gfn_range);
}
@@ -625,7 +633,7 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
kvm_flush_remote_tlbs(kvm);
mmu_unlock:
- if (r.found_memslot)
+ if (r.found_memslot && !range->lockless)
KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
@@ -647,6 +655,8 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
.on_lock = (void *)kvm_null_fn,
.flush_on_ret = flush_on_ret,
.may_block = false,
+ .lockless =
+ IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS),
};
return kvm_handle_hva_range(kvm, &range).ret;
--
2.48.1.362.g079036d154-goog
It's not a lockless walk of the memslots. The walk of memslots is already
"lockless" in that the memslots are protected by SRCU, not by mmu_lock.
On Tue, Feb 04, 2025, James Houghton wrote:
> It is possible to correctly do aging without taking the KVM MMU lock;
> this option allows such architectures to do so. Architectures that
> select CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS are responsible for
> correctness.
>
> Suggested-by: Yu Zhao <yuzhao@google.com>
> Signed-off-by: James Houghton <jthoughton@google.com>
> Reviewed-by: David Matlack <dmatlack@google.com>
> ---
> include/linux/kvm_host.h | 1 +
> virt/kvm/Kconfig | 2 ++
> virt/kvm/kvm_main.c | 24 +++++++++++++++++-------
> 3 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f34f4cfaa513..c28a6aa1f2ed 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -267,6 +267,7 @@ struct kvm_gfn_range {
> union kvm_mmu_notifier_arg arg;
> enum kvm_gfn_range_filter attr_filter;
> bool may_block;
> + bool lockless;
> };
> bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
> bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 54e959e7d68f..9356f4e4e255 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -102,6 +102,8 @@ config KVM_GENERIC_MMU_NOTIFIER
>
> config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
> depends on KVM_GENERIC_MMU_NOTIFIER
> +
> +config KVM_MMU_NOTIFIER_AGING_LOCKLESS
> bool
As noted by Stephen[*], this steals the "bool" from KVM_ELIDE_TLB_FLUSH_IF_YOUNG.
Looking at it with fresh eyes, it also fails to take a depenency on
KVM_GENERIC_MMU_NOTIFIER.
Lastly, the name is unnecessarily long. The "NOTIFIER" part is superfluous and
can be dropped, as it's a property of the architecture's MMU, not of KVM's
mmu_notifier implementation. E.g. if KVM ever did aging outside of the notifier,
then this Kconfig would be relevant for that flow as well. The dependency on
KVM_GENERIC_MMU_NOTIFIER is what communicates that its currently used only by
mmu_notifier aging.
Actually, I take "Lastly" back. IMO, it reads much better as LOCKLESS_AGING,
because LOCKLESS is an adverb that describes the AGING process.
[*] https://lore.kernel.org/all/20250214181401.4e7dd91d@canb.auug.org.au
TL;DR: I'm squashing this:
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index f0a60e59c884..fe8ea8c097de 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,7 +22,7 @@ config KVM_X86
select KVM_COMMON
select KVM_GENERIC_MMU_NOTIFIER
select KVM_ELIDE_TLB_FLUSH_IF_YOUNG
- select KVM_MMU_NOTIFIER_AGING_LOCKLESS
+ select KVM_MMU_LOCKLESS_AGING
select HAVE_KVM_IRQCHIP
select HAVE_KVM_PFNCACHE
select HAVE_KVM_DIRTY_RING_TSO
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 9356f4e4e255..746e1f466aa6 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -102,8 +102,10 @@ config KVM_GENERIC_MMU_NOTIFIER
config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
depends on KVM_GENERIC_MMU_NOTIFIER
+ bool
-config KVM_MMU_NOTIFIER_AGING_LOCKLESS
+config KVM_MMU_LOCKLESS_AGING
+ depends on KVM_GENERIC_MMU_NOTIFIER
bool
config KVM_GENERIC_MEMORY_ATTRIBUTES
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e514e3db1b31..201c14ff476f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -655,8 +655,7 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
.on_lock = (void *)kvm_null_fn,
.flush_on_ret = flush_on_ret,
.may_block = false,
- .lockless =
- IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS),
+ .lockless = IS_ENABLED(CONFIG_KVM_MMU_LOCKLESS_AGING),
};
return kvm_handle_hva_range(kvm, &range).ret;
On Fri, Feb 14, 2025 at 7:27 AM Sean Christopherson <seanjc@google.com> wrote:
>
> It's not a lockless walk of the memslots. The walk of memslots is already
> "lockless" in that the memslots are protected by SRCU, not by mmu_lock.
Indeed, so I guess I should have said something like "Allow memslot
walk callbacks to be lockless"
>
> On Tue, Feb 04, 2025, James Houghton wrote:
> > It is possible to correctly do aging without taking the KVM MMU lock;
> > this option allows such architectures to do so. Architectures that
> > select CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS are responsible for
> > correctness.
> >
> > Suggested-by: Yu Zhao <yuzhao@google.com>
> > Signed-off-by: James Houghton <jthoughton@google.com>
> > Reviewed-by: David Matlack <dmatlack@google.com>
> > ---
> > include/linux/kvm_host.h | 1 +
> > virt/kvm/Kconfig | 2 ++
> > virt/kvm/kvm_main.c | 24 +++++++++++++++++-------
> > 3 files changed, 20 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f34f4cfaa513..c28a6aa1f2ed 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -267,6 +267,7 @@ struct kvm_gfn_range {
> > union kvm_mmu_notifier_arg arg;
> > enum kvm_gfn_range_filter attr_filter;
> > bool may_block;
> > + bool lockless;
> > };
> > bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
> > bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 54e959e7d68f..9356f4e4e255 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -102,6 +102,8 @@ config KVM_GENERIC_MMU_NOTIFIER
> >
> > config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
> > depends on KVM_GENERIC_MMU_NOTIFIER
> > +
> > +config KVM_MMU_NOTIFIER_AGING_LOCKLESS
> > bool
>
> As noted by Stephen[*], this steals the "bool" from KVM_ELIDE_TLB_FLUSH_IF_YOUNG.
Ah sorry!
> Looking at it with fresh eyes, it also fails to take a depenency on
> KVM_GENERIC_MMU_NOTIFIER.
Indeed, thanks.
> Lastly, the name is unnecessarily long. The "NOTIFIER" part is superfluous and
> can be dropped, as it's a property of the architecture's MMU, not of KVM's
> mmu_notifier implementation. E.g. if KVM ever did aging outside of the notifier,
> then this Kconfig would be relevant for that flow as well. The dependency on
> KVM_GENERIC_MMU_NOTIFIER is what communicates that its currently used only by
> mmu_notifier aging.
>
> Actually, I take "Lastly" back. IMO, it reads much better as LOCKLESS_AGING,
> because LOCKLESS is an adverb that describes the AGING process.
>
> [*] https://lore.kernel.org/all/20250214181401.4e7dd91d@canb.auug.org.au
>
> TL;DR: I'm squashing this:
>
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index f0a60e59c884..fe8ea8c097de 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -22,7 +22,7 @@ config KVM_X86
> select KVM_COMMON
> select KVM_GENERIC_MMU_NOTIFIER
> select KVM_ELIDE_TLB_FLUSH_IF_YOUNG
> - select KVM_MMU_NOTIFIER_AGING_LOCKLESS
> + select KVM_MMU_LOCKLESS_AGING
> select HAVE_KVM_IRQCHIP
> select HAVE_KVM_PFNCACHE
> select HAVE_KVM_DIRTY_RING_TSO
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 9356f4e4e255..746e1f466aa6 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -102,8 +102,10 @@ config KVM_GENERIC_MMU_NOTIFIER
>
> config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
> depends on KVM_GENERIC_MMU_NOTIFIER
> + bool
>
> -config KVM_MMU_NOTIFIER_AGING_LOCKLESS
> +config KVM_MMU_LOCKLESS_AGING
> + depends on KVM_GENERIC_MMU_NOTIFIER
> bool
>
> config KVM_GENERIC_MEMORY_ATTRIBUTES
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index e514e3db1b31..201c14ff476f 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -655,8 +655,7 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
> .on_lock = (void *)kvm_null_fn,
> .flush_on_ret = flush_on_ret,
> .may_block = false,
> - .lockless =
> - IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS),
> + .lockless = IS_ENABLED(CONFIG_KVM_MMU_LOCKLESS_AGING),
> };
>
> return kvm_handle_hva_range(kvm, &range).ret;
LGTM, thanks! You will also need to do this same rename in patch 4[1].
[1]: https://lore.kernel.org/kvm/20250204004038.1680123-5-jthoughton@google.com/
© 2016 - 2025 Red Hat, Inc.