Provide flexibility to the architecture to synchronize as optimally as
they can instead of always taking the MMU lock for writing.
Architectures that do their own locking must select
CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS.
The immediate application is to allow architectures to implement the
test/clear_young MMU notifiers more cheaply.
Suggested-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
---
include/linux/kvm_host.h | 1 +
virt/kvm/Kconfig | 2 ++
virt/kvm/kvm_main.c | 28 +++++++++++++++++++++-------
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 18a1672ffcbf..ab0318dbb8bd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -260,6 +260,7 @@ struct kvm_gfn_range {
gfn_t end;
union kvm_mmu_notifier_arg arg;
bool may_block;
+ bool lockless;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 54e959e7d68f..b50e4e629ac9 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -102,6 +102,8 @@ config KVM_GENERIC_MMU_NOTIFIER
config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
depends on KVM_GENERIC_MMU_NOTIFIER
+
+config KVM_MMU_NOTIFIER_YOUNG_LOCKLESS
bool
config KVM_GENERIC_MEMORY_ATTRIBUTES
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b234a9acdb3..218edf037917 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -517,6 +517,7 @@ struct kvm_mmu_notifier_range {
on_lock_fn_t on_lock;
bool flush_on_ret;
bool may_block;
+ bool lockless;
};
/*
@@ -571,6 +572,10 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
IS_KVM_NULL_FN(range->handler)))
return r;
+ /* on_lock will never be called for lockless walks */
+ if (WARN_ON_ONCE(range->lockless && !IS_KVM_NULL_FN(range->on_lock)))
+ return r;
+
idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
@@ -602,15 +607,18 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
gfn_range.start = hva_to_gfn_memslot(hva_start, slot);
gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
gfn_range.slot = slot;
+ gfn_range.lockless = range->lockless;
if (!r.found_memslot) {
r.found_memslot = true;
- KVM_MMU_LOCK(kvm);
- if (!IS_KVM_NULL_FN(range->on_lock))
- range->on_lock(kvm);
-
- if (IS_KVM_NULL_FN(range->handler))
- goto mmu_unlock;
+ if (!range->lockless) {
+ KVM_MMU_LOCK(kvm);
+ if (!IS_KVM_NULL_FN(range->on_lock))
+ range->on_lock(kvm);
+
+ if (IS_KVM_NULL_FN(range->handler))
+ goto mmu_unlock;
+ }
}
r.ret |= range->handler(kvm, &gfn_range);
}
@@ -620,7 +628,7 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
kvm_flush_remote_tlbs(kvm);
mmu_unlock:
- if (r.found_memslot)
+ if (r.found_memslot && !range->lockless)
KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
@@ -797,6 +805,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
.flush_on_ret =
!IS_ENABLED(CONFIG_KVM_ELIDE_TLB_FLUSH_IF_YOUNG),
.may_block = false,
+ .lockless =
+ IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
};
trace_kvm_age_hva(start, end);
@@ -817,6 +827,8 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
.on_lock = (void *)kvm_null_fn,
.flush_on_ret = false,
.may_block = false,
+ .lockless =
+ IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
};
trace_kvm_age_hva(start, end);
@@ -849,6 +861,8 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
.on_lock = (void *)kvm_null_fn,
.flush_on_ret = false,
.may_block = false,
+ .lockless =
+ IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
};
trace_kvm_test_age_hva(address);
--
2.47.0.199.ga7371fff76-goog