We have recently found a bug [1] in the pKVM memory ownership
transitions by code inspection, but it could have been caught with a
test.
Introduce a boot-time selftest exercising all the known pKVM memory
transitions and importantly checks the rejection of illegal transitions.
The new test is hidden behind a new Kconfig option separate from
CONFIG_EL2_NVHE_DEBUG on purpose as that has side effects on the
transition checks ([1] doesn't reproduce with EL2 debug enabled).
[1] https://lore.kernel.org/kvmarm/20241128154406.602875-1-qperret@google.com/
Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
---
arch/arm64/kvm/Kconfig | 10 ++
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 6 +
arch/arm64/kvm/hyp/nvhe/mem_protect.c | 111 ++++++++++++++++++
arch/arm64/kvm/hyp/nvhe/setup.c | 2 +
4 files changed, 129 insertions(+)
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index ead632ad01b4..038d7f52232c 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -46,6 +46,7 @@ menuconfig KVM
config NVHE_EL2_DEBUG
bool "Debug mode for non-VHE EL2 object"
depends on KVM
+ select PKVM_SELFTESTS
help
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
Failure reports will BUG() in the hypervisor. This is intended for
@@ -53,6 +54,15 @@ config NVHE_EL2_DEBUG
If unsure, say N.
+config PKVM_SELFTESTS
+ bool "Protected KVM hypervisor selftests"
+ help
+ Say Y here to enable Protected KVM (pKVM) hypervisor selftests
+ during boot. Failure reports will panic the hypervisor. This is
+ intended for EL2 hypervisor development.
+
+ If unsure, say N.
+
config PROTECTED_NVHE_STACKTRACE
bool "Protected KVM hypervisor stacktraces"
depends on NVHE_EL2_DEBUG
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 978f38c386ee..31a3f2cdf242 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -67,4 +67,10 @@ static __always_inline void __load_host_stage2(void)
else
write_sysreg(0, vttbr_el2);
}
+
+#ifdef CONFIG_PKVM_SELFTESTS
+void pkvm_ownership_selftest(void);
+#else
+static inline void pkvm_ownership_selftest(void) { }
+#endif
#endif /* __KVM_NVHE_MEM_PROTECT__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index ae39abc7e604..46f3f4aeecc5 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -1083,3 +1083,114 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
return 0;
}
+
+#ifdef CONFIG_PKVM_SELFTESTS
+struct pkvm_expected_state {
+ enum pkvm_page_state host;
+ enum pkvm_page_state hyp;
+};
+
+static struct pkvm_expected_state selftest_state;
+static struct hyp_page *selftest_page;
+
+static void assert_page_state(void)
+{
+ void *virt = hyp_page_to_virt(selftest_page);
+ u64 size = PAGE_SIZE << selftest_page->order;
+ u64 phys = hyp_virt_to_phys(virt);
+
+ host_lock_component();
+ WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));
+ host_unlock_component();
+
+ hyp_lock_component();
+ WARN_ON(__hyp_check_page_state_range((u64)virt, size, selftest_state.hyp));
+ hyp_unlock_component();
+}
+
+#define assert_transition_res(res, fn, ...) \
+ do { \
+ WARN_ON(fn(__VA_ARGS__) != res); \
+ assert_page_state(); \
+ } while (0)
+
+void pkvm_ownership_selftest(void)
+{
+ void *virt = hyp_alloc_pages(&host_s2_pool, 0);
+ u64 phys, size, pfn;
+
+ WARN_ON(!virt);
+ selftest_page = hyp_virt_to_page(virt);
+ selftest_page->refcount = 0;
+
+ size = PAGE_SIZE << selftest_page->order;
+ phys = hyp_virt_to_phys(virt);
+ pfn = hyp_phys_to_pfn(phys);
+
+ selftest_state.host = PKVM_NOPAGE;
+ selftest_state.hyp = PKVM_PAGE_OWNED;
+ assert_page_state();
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
+ assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
+
+ selftest_state.host = PKVM_PAGE_OWNED;
+ selftest_state.hyp = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
+ assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
+
+ selftest_state.host = PKVM_PAGE_SHARED_OWNED;
+ selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;
+ assert_transition_res(0, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+
+ assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
+ assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
+ hyp_unpin_shared_mem(virt, virt + size);
+ WARN_ON(hyp_page_count(virt) != 1);
+ assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+
+ hyp_unpin_shared_mem(virt, virt + size);
+ assert_page_state();
+ WARN_ON(hyp_page_count(virt));
+
+ selftest_state.host = PKVM_PAGE_OWNED;
+ selftest_state.hyp = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_host_unshare_hyp, pfn);
+
+ selftest_state.host = PKVM_PAGE_SHARED_OWNED;
+ selftest_state.hyp = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
+ assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
+ assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
+
+ selftest_state.host = PKVM_PAGE_OWNED;
+ selftest_state.hyp = PKVM_NOPAGE;
+ assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1);
+ assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
+
+ selftest_state.host = PKVM_NOPAGE;
+ selftest_state.hyp = PKVM_PAGE_OWNED;
+ assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1);
+
+ selftest_page->refcount = 1;
+ hyp_put_page(&host_s2_pool, virt);
+}
+#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 46d9bd04348f..54006f959e1b 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -308,6 +308,8 @@ void __noreturn __pkvm_init_finalise(void)
goto out;
pkvm_hyp_vm_table_init(vm_table_base);
+
+ pkvm_ownership_selftest();
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
--
2.48.1.658.g4767266eb4-goog
On Tue, 25 Feb 2025 01:53:26 +0000, Quentin Perret <qperret@google.com> wrote: > > We have recently found a bug [1] in the pKVM memory ownership > transitions by code inspection, but it could have been caught with a > test. > > Introduce a boot-time selftest exercising all the known pKVM memory > transitions and importantly checks the rejection of illegal transitions. > > The new test is hidden behind a new Kconfig option separate from > CONFIG_EL2_NVHE_DEBUG on purpose as that has side effects on the > transition checks ([1] doesn't reproduce with EL2 debug enabled). That's a bit annoying, isn't it? Without EL2_DEBUG selected, you won't get any stacktrace, and the WARN_ON()s are a guaranteed panic. Yes, this is better than nothing, but I'm a bit worried this is going to be hard to use. Is there a way to reduce the impact the EL2 debug has on the rest of the code? It feels like it is more invasive than it should be... Thanks, M. -- Without deviation from the norm, progress is not possible.
On Wednesday 26 Feb 2025 at 14:32:52 (+0000), Marc Zyngier wrote: > On Tue, 25 Feb 2025 01:53:26 +0000, > Quentin Perret <qperret@google.com> wrote: > > > > We have recently found a bug [1] in the pKVM memory ownership > > transitions by code inspection, but it could have been caught with a > > test. > > > > Introduce a boot-time selftest exercising all the known pKVM memory > > transitions and importantly checks the rejection of illegal transitions. > > > > The new test is hidden behind a new Kconfig option separate from > > CONFIG_EL2_NVHE_DEBUG on purpose as that has side effects on the > > transition checks ([1] doesn't reproduce with EL2 debug enabled). > > That's a bit annoying, isn't it? Without EL2_DEBUG selected, you won't > get any stacktrace, and the WARN_ON()s are a guaranteed panic. Yes, > this is better than nothing, but I'm a bit worried this is going to be > hard to use. Right, so you _can_ enable EL2_DEBUG on top of the selftest stuff, and if you're not hitting one of those hard-to-find bugs described in the commit message above, then you're golden. In practice I suspect that if enabling the selftest alone leads to a panic, the next logical step is to enable EL2_DEBUG and see what you get. If enabling EL2_DEBUG makes the issue go away, then that'll require digging a bit deeper, but that should be pretty rare I presume. > Is there a way to reduce the impact the EL2 debug has on the rest of > the code? It feels like it is more invasive than it should be... Turns out I have a WiP series that moves the hypervisor ownership state to the hyp_vmemmap, similar to what we did for the host ownership. A nice property of that is that hyp state lookups become really cheap, no page-table walks required. So we could probably afford to drop the EL2_DEBUG ifdefery in host_share_hyp() and friends, and just unconditionally cross-check the hyp state on all transitions where it is involved. And with that we should probably just fold the pkvm selftest under EL2_DEBUG and call it a day. Would that work? Thanks, Quentin
© 2016 - 2026 Red Hat, Inc.