[PATCH v9 02/11] KVM: Add lockless memslot walk to KVM

James Houghton posted 11 patches 10 months, 1 week ago
[PATCH v9 02/11] KVM: Add lockless memslot walk to KVM
Posted by James Houghton 10 months, 1 week ago
It is possible to correctly do aging without taking the KVM MMU lock;
this option allows such architectures to do so. Architectures that
select CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS are responsible for
correctness.

Suggested-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/Kconfig         |  2 ++
 virt/kvm/kvm_main.c      | 24 +++++++++++++++++-------
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f34f4cfaa513..c28a6aa1f2ed 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -267,6 +267,7 @@ struct kvm_gfn_range {
 	union kvm_mmu_notifier_arg arg;
 	enum kvm_gfn_range_filter attr_filter;
 	bool may_block;
+	bool lockless;
 };
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 54e959e7d68f..9356f4e4e255 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -102,6 +102,8 @@ config KVM_GENERIC_MMU_NOTIFIER
 
 config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
        depends on KVM_GENERIC_MMU_NOTIFIER
+
+config KVM_MMU_NOTIFIER_AGING_LOCKLESS
        bool
 
 config KVM_GENERIC_MEMORY_ATTRIBUTES
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1bd49770506a..4734ae9e8a54 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -517,6 +517,7 @@ struct kvm_mmu_notifier_range {
 	on_lock_fn_t on_lock;
 	bool flush_on_ret;
 	bool may_block;
+	bool lockless;
 };
 
 /*
@@ -571,6 +572,10 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
 			 IS_KVM_NULL_FN(range->handler)))
 		return r;
 
+	/* on_lock will never be called for lockless walks */
+	if (WARN_ON_ONCE(range->lockless && !IS_KVM_NULL_FN(range->on_lock)))
+		return r;
+
 	idx = srcu_read_lock(&kvm->srcu);
 
 	for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
@@ -607,15 +612,18 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
 			gfn_range.start = hva_to_gfn_memslot(hva_start, slot);
 			gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
 			gfn_range.slot = slot;
+			gfn_range.lockless = range->lockless;
 
 			if (!r.found_memslot) {
 				r.found_memslot = true;
-				KVM_MMU_LOCK(kvm);
-				if (!IS_KVM_NULL_FN(range->on_lock))
-					range->on_lock(kvm);
-
-				if (IS_KVM_NULL_FN(range->handler))
-					goto mmu_unlock;
+				if (!range->lockless) {
+					KVM_MMU_LOCK(kvm);
+					if (!IS_KVM_NULL_FN(range->on_lock))
+						range->on_lock(kvm);
+
+					if (IS_KVM_NULL_FN(range->handler))
+						goto mmu_unlock;
+				}
 			}
 			r.ret |= range->handler(kvm, &gfn_range);
 		}
@@ -625,7 +633,7 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
 		kvm_flush_remote_tlbs(kvm);
 
 mmu_unlock:
-	if (r.found_memslot)
+	if (r.found_memslot && !range->lockless)
 		KVM_MMU_UNLOCK(kvm);
 
 	srcu_read_unlock(&kvm->srcu, idx);
@@ -647,6 +655,8 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
 		.on_lock	= (void *)kvm_null_fn,
 		.flush_on_ret	= flush_on_ret,
 		.may_block	= false,
+		.lockless	=
+			IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS),
 	};
 
 	return kvm_handle_hva_range(kvm, &range).ret;
-- 
2.48.1.362.g079036d154-goog
Re: [PATCH v9 02/11] KVM: Add lockless memslot walk to KVM
Posted by Sean Christopherson 10 months ago
It's not a lockless walk of the memslots.  The walk of memslots is already
"lockless" in that the memslots are protected by SRCU, not by mmu_lock.

On Tue, Feb 04, 2025, James Houghton wrote:
> It is possible to correctly do aging without taking the KVM MMU lock;
> this option allows such architectures to do so. Architectures that
> select CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS are responsible for
> correctness.
> 
> Suggested-by: Yu Zhao <yuzhao@google.com>
> Signed-off-by: James Houghton <jthoughton@google.com>
> Reviewed-by: David Matlack <dmatlack@google.com>
> ---
>  include/linux/kvm_host.h |  1 +
>  virt/kvm/Kconfig         |  2 ++
>  virt/kvm/kvm_main.c      | 24 +++++++++++++++++-------
>  3 files changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f34f4cfaa513..c28a6aa1f2ed 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -267,6 +267,7 @@ struct kvm_gfn_range {
>  	union kvm_mmu_notifier_arg arg;
>  	enum kvm_gfn_range_filter attr_filter;
>  	bool may_block;
> +	bool lockless;
>  };
>  bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
>  bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 54e959e7d68f..9356f4e4e255 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -102,6 +102,8 @@ config KVM_GENERIC_MMU_NOTIFIER
>  
>  config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
>         depends on KVM_GENERIC_MMU_NOTIFIER
> +
> +config KVM_MMU_NOTIFIER_AGING_LOCKLESS
>         bool

As noted by Stephen[*], this steals the "bool" from KVM_ELIDE_TLB_FLUSH_IF_YOUNG.

Looking at it with fresh eyes, it also fails to take a depenency on
KVM_GENERIC_MMU_NOTIFIER.

Lastly, the name is unnecessarily long.  The "NOTIFIER" part is superfluous and
can be dropped, as it's a property of the architecture's MMU, not of KVM's
mmu_notifier implementation. E.g. if KVM ever did aging outside of the notifier,
then this Kconfig would be relevant for that flow as well.  The dependency on
KVM_GENERIC_MMU_NOTIFIER is what communicates that its currently used only by
mmu_notifier aging.

Actually, I take "Lastly" back.  IMO, it reads much better as LOCKLESS_AGING,
because LOCKLESS is an adverb that describes the AGING process.

[*] https://lore.kernel.org/all/20250214181401.4e7dd91d@canb.auug.org.au

TL;DR: I'm squashing this:

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index f0a60e59c884..fe8ea8c097de 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,7 +22,7 @@ config KVM_X86
        select KVM_COMMON
        select KVM_GENERIC_MMU_NOTIFIER
        select KVM_ELIDE_TLB_FLUSH_IF_YOUNG
-       select KVM_MMU_NOTIFIER_AGING_LOCKLESS
+       select KVM_MMU_LOCKLESS_AGING
        select HAVE_KVM_IRQCHIP
        select HAVE_KVM_PFNCACHE
        select HAVE_KVM_DIRTY_RING_TSO
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 9356f4e4e255..746e1f466aa6 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -102,8 +102,10 @@ config KVM_GENERIC_MMU_NOTIFIER
 
 config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
        depends on KVM_GENERIC_MMU_NOTIFIER
+       bool
 
-config KVM_MMU_NOTIFIER_AGING_LOCKLESS
+config KVM_MMU_LOCKLESS_AGING
+       depends on KVM_GENERIC_MMU_NOTIFIER
        bool
 
 config KVM_GENERIC_MEMORY_ATTRIBUTES
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e514e3db1b31..201c14ff476f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -655,8 +655,7 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
                .on_lock        = (void *)kvm_null_fn,
                .flush_on_ret   = flush_on_ret,
                .may_block      = false,
-               .lockless       =
-                       IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS),
+               .lockless       = IS_ENABLED(CONFIG_KVM_MMU_LOCKLESS_AGING),
        };
 
        return kvm_handle_hva_range(kvm, &range).ret;
Re: [PATCH v9 02/11] KVM: Add lockless memslot walk to KVM
Posted by James Houghton 10 months ago
On Fri, Feb 14, 2025 at 7:27 AM Sean Christopherson <seanjc@google.com> wrote:
>
> It's not a lockless walk of the memslots.  The walk of memslots is already
> "lockless" in that the memslots are protected by SRCU, not by mmu_lock.

Indeed, so I guess I should have said something like "Allow memslot
walk callbacks to be lockless"

>
> On Tue, Feb 04, 2025, James Houghton wrote:
> > It is possible to correctly do aging without taking the KVM MMU lock;
> > this option allows such architectures to do so. Architectures that
> > select CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS are responsible for
> > correctness.
> >
> > Suggested-by: Yu Zhao <yuzhao@google.com>
> > Signed-off-by: James Houghton <jthoughton@google.com>
> > Reviewed-by: David Matlack <dmatlack@google.com>
> > ---
> >  include/linux/kvm_host.h |  1 +
> >  virt/kvm/Kconfig         |  2 ++
> >  virt/kvm/kvm_main.c      | 24 +++++++++++++++++-------
> >  3 files changed, 20 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f34f4cfaa513..c28a6aa1f2ed 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -267,6 +267,7 @@ struct kvm_gfn_range {
> >       union kvm_mmu_notifier_arg arg;
> >       enum kvm_gfn_range_filter attr_filter;
> >       bool may_block;
> > +     bool lockless;
> >  };
> >  bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
> >  bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 54e959e7d68f..9356f4e4e255 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -102,6 +102,8 @@ config KVM_GENERIC_MMU_NOTIFIER
> >
> >  config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
> >         depends on KVM_GENERIC_MMU_NOTIFIER
> > +
> > +config KVM_MMU_NOTIFIER_AGING_LOCKLESS
> >         bool
>
> As noted by Stephen[*], this steals the "bool" from KVM_ELIDE_TLB_FLUSH_IF_YOUNG.

Ah sorry!

> Looking at it with fresh eyes, it also fails to take a depenency on
> KVM_GENERIC_MMU_NOTIFIER.

Indeed, thanks.

> Lastly, the name is unnecessarily long.  The "NOTIFIER" part is superfluous and
> can be dropped, as it's a property of the architecture's MMU, not of KVM's
> mmu_notifier implementation. E.g. if KVM ever did aging outside of the notifier,
> then this Kconfig would be relevant for that flow as well.  The dependency on
> KVM_GENERIC_MMU_NOTIFIER is what communicates that its currently used only by
> mmu_notifier aging.
>
> Actually, I take "Lastly" back.  IMO, it reads much better as LOCKLESS_AGING,
> because LOCKLESS is an adverb that describes the AGING process.
>
> [*] https://lore.kernel.org/all/20250214181401.4e7dd91d@canb.auug.org.au
>
> TL;DR: I'm squashing this:
>
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index f0a60e59c884..fe8ea8c097de 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -22,7 +22,7 @@ config KVM_X86
>         select KVM_COMMON
>         select KVM_GENERIC_MMU_NOTIFIER
>         select KVM_ELIDE_TLB_FLUSH_IF_YOUNG
> -       select KVM_MMU_NOTIFIER_AGING_LOCKLESS
> +       select KVM_MMU_LOCKLESS_AGING
>         select HAVE_KVM_IRQCHIP
>         select HAVE_KVM_PFNCACHE
>         select HAVE_KVM_DIRTY_RING_TSO
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 9356f4e4e255..746e1f466aa6 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -102,8 +102,10 @@ config KVM_GENERIC_MMU_NOTIFIER
>
>  config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
>         depends on KVM_GENERIC_MMU_NOTIFIER
> +       bool
>
> -config KVM_MMU_NOTIFIER_AGING_LOCKLESS
> +config KVM_MMU_LOCKLESS_AGING
> +       depends on KVM_GENERIC_MMU_NOTIFIER
>         bool
>
>  config KVM_GENERIC_MEMORY_ATTRIBUTES
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index e514e3db1b31..201c14ff476f 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -655,8 +655,7 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
>                 .on_lock        = (void *)kvm_null_fn,
>                 .flush_on_ret   = flush_on_ret,
>                 .may_block      = false,
> -               .lockless       =
> -                       IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_AGING_LOCKLESS),
> +               .lockless       = IS_ENABLED(CONFIG_KVM_MMU_LOCKLESS_AGING),
>         };
>
>         return kvm_handle_hva_range(kvm, &range).ret;

LGTM, thanks! You will also need to do this same rename in patch 4[1].

[1]: https://lore.kernel.org/kvm/20250204004038.1680123-5-jthoughton@google.com/