From: Marc Zyngier <maz@kernel.org>
Rather than look-up the hyp vCPU on every run hypercall at EL2,
introduce a per-CPU 'loaded_hyp_vcpu' tracking variable which is updated
by a pair of load/put hypercalls called directly from
kvm_arch_vcpu_{load,put}() when pKVM is enabled.
Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
---
arch/arm64/include/asm/kvm_asm.h | 2 ++
arch/arm64/kvm/arm.c | 14 ++++++++
arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 7 ++++
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 47 ++++++++++++++++++++------
arch/arm64/kvm/hyp/nvhe/pkvm.c | 29 ++++++++++++++++
arch/arm64/kvm/vgic/vgic-v3.c | 6 ++--
6 files changed, 93 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ca2590344313..89c0fac69551 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -79,6 +79,8 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a102c3aebdbc..55cc62b2f469 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -619,12 +619,26 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_arch_vcpu_load_debug_state_flags(vcpu);
+ if (is_protected_kvm_enabled()) {
+ kvm_call_hyp_nvhe(__pkvm_vcpu_load,
+ vcpu->kvm->arch.pkvm.handle,
+ vcpu->vcpu_idx, vcpu->arch.hcr_el2);
+ kvm_call_hyp(__vgic_v3_restore_vmcr_aprs,
+ &vcpu->arch.vgic_cpu.vgic_v3);
+ }
+
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
vcpu_set_on_unsupported_cpu(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ if (is_protected_kvm_enabled()) {
+ kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
+ &vcpu->arch.vgic_cpu.vgic_v3);
+ kvm_call_hyp_nvhe(__pkvm_vcpu_put);
+ }
+
kvm_arch_vcpu_put_debug_state_flags(vcpu);
kvm_arch_vcpu_put_fp(vcpu);
if (has_vhe())
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index f361d8b91930..be52c5b15e21 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -20,6 +20,12 @@ struct pkvm_hyp_vcpu {
/* Backpointer to the host's (untrusted) vCPU instance. */
struct kvm_vcpu *host_vcpu;
+
+ /*
+ * If this hyp vCPU is loaded, then this is a backpointer to the
+ * per-cpu pointer tracking us. Otherwise, NULL if not loaded.
+ */
+ struct pkvm_hyp_vcpu **loaded_hyp_vcpu;
};
/*
@@ -69,6 +75,7 @@ int __pkvm_teardown_vm(pkvm_handle_t handle);
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
unsigned int vcpu_idx);
void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
+struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void);
struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle);
void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 6aa0b13d86e5..95d78db315b3 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -141,16 +141,46 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
}
+static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(unsigned int, vcpu_idx, host_ctxt, 2);
+ DECLARE_REG(u64, hcr_el2, host_ctxt, 3);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx);
+ if (!hyp_vcpu)
+ return;
+
+ if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
+ /* Propagate WFx trapping flags */
+ hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI);
+ hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI);
+ }
+}
+
+static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
+{
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (hyp_vcpu)
+ pkvm_put_hyp_vcpu(hyp_vcpu);
+}
+
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
int ret;
- host_vcpu = kern_hyp_va(host_vcpu);
-
if (unlikely(is_protected_kvm_enabled())) {
- struct pkvm_hyp_vcpu *hyp_vcpu;
- struct kvm *host_kvm;
+ struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
/*
* KVM (and pKVM) doesn't support SME guests for now, and
@@ -163,9 +193,6 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
goto out;
}
- host_kvm = kern_hyp_va(host_vcpu->kvm);
- hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
- host_vcpu->vcpu_idx);
if (!hyp_vcpu) {
ret = -EINVAL;
goto out;
@@ -176,12 +203,10 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
sync_hyp_vcpu(hyp_vcpu);
- pkvm_put_hyp_vcpu(hyp_vcpu);
} else {
/* The host is fully trusted, run its vCPU directly. */
- ret = __kvm_vcpu_run(host_vcpu);
+ ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu));
}
-
out:
cpu_reg(host_ctxt, 1) = ret;
}
@@ -409,6 +434,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_teardown_vm),
+ HANDLE_FUNC(__pkvm_vcpu_load),
+ HANDLE_FUNC(__pkvm_vcpu_put),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index d46a02e24e4a..496d186efb03 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -23,6 +23,12 @@ unsigned int kvm_arm_vmid_bits;
unsigned int kvm_host_sve_max_vl;
+/*
+ * The currently loaded hyp vCPU for each physical CPU. Used only when
+ * protected KVM is enabled, but for both protected and non-protected VMs.
+ */
+static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu);
+
/*
* Set trap register values based on features in ID_AA64PFR0.
*/
@@ -306,15 +312,30 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
struct pkvm_hyp_vcpu *hyp_vcpu = NULL;
struct pkvm_hyp_vm *hyp_vm;
+ /* Cannot load a new vcpu without putting the old one first. */
+ if (__this_cpu_read(loaded_hyp_vcpu))
+ return NULL;
+
hyp_spin_lock(&vm_table_lock);
hyp_vm = get_vm_by_handle(handle);
if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx)
goto unlock;
hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
+
+ /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */
+ if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) {
+ hyp_vcpu = NULL;
+ goto unlock;
+ }
+
+ hyp_vcpu->loaded_hyp_vcpu = this_cpu_ptr(&loaded_hyp_vcpu);
hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
unlock:
hyp_spin_unlock(&vm_table_lock);
+
+ if (hyp_vcpu)
+ __this_cpu_write(loaded_hyp_vcpu, hyp_vcpu);
return hyp_vcpu;
}
@@ -323,10 +344,18 @@ void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
hyp_spin_lock(&vm_table_lock);
+ hyp_vcpu->loaded_hyp_vcpu = NULL;
+ __this_cpu_write(loaded_hyp_vcpu, NULL);
hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
hyp_spin_unlock(&vm_table_lock);
}
+struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void)
+{
+ return __this_cpu_read(loaded_hyp_vcpu);
+
+}
+
struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle)
{
struct pkvm_hyp_vm *hyp_vm;
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index f267bc2486a1..c2ef41fff079 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -734,7 +734,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
+ if (likely(!is_protected_kvm_enabled()))
+ kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
if (has_vhe())
__vgic_v3_activate_traps(cpu_if);
@@ -746,7 +747,8 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
+ if (likely(!is_protected_kvm_enabled()))
+ kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
if (has_vhe())
--
2.47.1.613.gc27f4b7a9f-goog
On Wed, Dec 18, 2024 at 07:40:50PM +0000, Quentin Perret wrote:
> From: Marc Zyngier <maz@kernel.org>
>
> Rather than look-up the hyp vCPU on every run hypercall at EL2,
> introduce a per-CPU 'loaded_hyp_vcpu' tracking variable which is updated
> by a pair of load/put hypercalls called directly from
> kvm_arch_vcpu_{load,put}() when pKVM is enabled.
Current mainline crashes when running the arch_timer_edge_cases selftest
in pKVM mode on a LibreTech Le Potato board, with a bisection pointing
at this change. The system dies like this:
# selftests: kvm: arch_timer_edge_cases
[ 130.599140] kvm [4518]: nVHE hyp panic at: [<ffff800081102b58>] __kvm_nvhe___vgic_v3_restore_vmcr_aprs+0x8/0x84!
[ 130.603685] kvm [4518]: Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE
[ 130.611962] kvm [4518]: Hyp Offset: 0xfffeca95ed000000
[ 130.617053] Kernel panic - not syncing: HYP panic:
Full log:
https://lava.sirena.org.uk/scheduler/job/1092083
Bisection log:
git bisect start
# status: waiting for both good and bad commits
# bad: [2014c95afecee3e76ca4a56956a936e23283f05b] Linux 6.14-rc1
git bisect bad 2014c95afecee3e76ca4a56956a936e23283f05b
# status: waiting for good commit(s), bad commit known
# good: [ffd294d346d185b70e28b1a28abe367bbfe53c04] Linux 6.13
git bisect good ffd294d346d185b70e28b1a28abe367bbfe53c04
# good: [6250ebe666e425e173df5e11e8a612d57921f48d] i2c: Fix core-managed per-client debugfs handling
git bisect good 6250ebe666e425e173df5e11e8a612d57921f48d
# good: [647d69605c70368d54fc012fce8a43e8e5955b04] Merge tag 'pci-v6.14-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
git bisect good 647d69605c70368d54fc012fce8a43e8e5955b04
# good: [13845bdc869f136f92ad3d40ea09b867bb4ce467] Merge tag 'char-misc-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
git bisect good 13845bdc869f136f92ad3d40ea09b867bb4ce467
# bad: [9f10e7fb6a06bce4f81de5fd0f2f0390f99e89e4] Merge tag 'phy-for-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy
git bisect bad 9f10e7fb6a06bce4f81de5fd0f2f0390f99e89e4
# bad: [58f504efcda54a9079a38203acc088c3354aaa60] Merge tag 'tty-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
git bisect bad 58f504efcda54a9079a38203acc088c3354aaa60
# good: [a37eea94f775132866ecdd466fd88027d7125515] Merge tag 'sparc-for-6.14-tag1' of git://git.kernel.org/pub/scm/linux/kernel/git/alarsson/linux-sparc
git bisect good a37eea94f775132866ecdd466fd88027d7125515
# bad: [5e68d2eeac70978a06406c5b156815ceb00437f9] Merge branch kvm-arm64/pkvm-memshare-declutter into kvmarm-master/next
git bisect bad 5e68d2eeac70978a06406c5b156815ceb00437f9
# bad: [d0670128d42fa170bf8ba878cd23504c5c5cccc7] Merge branch kvm-arm64/pkvm-np-guest into kvmarm-master/next
git bisect bad d0670128d42fa170bf8ba878cd23504c5c5cccc7
# good: [c4a6ed85455979ef3fbadc2f1bdf18734b0ecea6] KVM: arm64: Promote guest ownership for DBGxVR/DBGxCR reads
git bisect good c4a6ed85455979ef3fbadc2f1bdf18734b0ecea6
# bad: [d0bd3e6570aee42766e7bd884734ae078667ea1e] KVM: arm64: Introduce __pkvm_host_share_guest()
git bisect bad d0bd3e6570aee42766e7bd884734ae078667ea1e
# good: [5398ddc5c90bd418b90d859e9267aa39399021af] KVM: arm64: Pass walk flags to kvm_pgtable_stage2_mkyoung
git bisect good 5398ddc5c90bd418b90d859e9267aa39399021af
# good: [c77e5181fed54b25d489eb7d2ccb5c1c72a1063c] KVM: arm64: Make kvm_pgtable_stage2_init() a static inline function
git bisect good c77e5181fed54b25d489eb7d2ccb5c1c72a1063c
# bad: [f7d03fcbf1f482069e9afac55b17de3bd323b8f6] KVM: arm64: Introduce __pkvm_vcpu_{load,put}()
git bisect bad f7d03fcbf1f482069e9afac55b17de3bd323b8f6
# good: [99996d575ee69d4327bad98a0148729b73dde23a] KVM: arm64: Add {get,put}_pkvm_hyp_vm() helpers
git bisect good 99996d575ee69d4327bad98a0148729b73dde23a
# first bad commit: [f7d03fcbf1f482069e9afac55b17de3bd323b8f6] KVM: arm64: Introduce __pkvm_vcpu_{load,put}()
On Mon, Feb 03, 2025 at 07:50:44PM +0000, Mark Brown wrote:
> On Wed, Dec 18, 2024 at 07:40:50PM +0000, Quentin Perret wrote:
> > From: Marc Zyngier <maz@kernel.org>
> >
> > Rather than look-up the hyp vCPU on every run hypercall at EL2,
> > introduce a per-CPU 'loaded_hyp_vcpu' tracking variable which is updated
> > by a pair of load/put hypercalls called directly from
> > kvm_arch_vcpu_{load,put}() when pKVM is enabled.
>
> Current mainline crashes when running the arch_timer_edge_cases selftest
> in pKVM mode on a LibreTech Le Potato board, with a bisection pointing
> at this change. The system dies like this:
>
> # selftests: kvm: arch_timer_edge_cases
> [ 130.599140] kvm [4518]: nVHE hyp panic at: [<ffff800081102b58>] __kvm_nvhe___vgic_v3_restore_vmcr_aprs+0x8/0x84!
> [ 130.603685] kvm [4518]: Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE
> [ 130.611962] kvm [4518]: Hyp Offset: 0xfffeca95ed000000
> [ 130.617053] Kernel panic - not syncing: HYP panic:
Thanks, fix posted here [*], although as a consequence this machine will
stop running KVM in protected mode (which was never meant to work in the
first place).
[*]: https://lore.kernel.org/kvmarm/20250203231543.233511-1-oliver.upton@linux.dev/
--
Thanks,
Oliver
On Mon, Feb 03, 2025 at 03:19:03PM -0800, Oliver Upton wrote: > On Mon, Feb 03, 2025 at 07:50:44PM +0000, Mark Brown wrote: > > Current mainline crashes when running the arch_timer_edge_cases selftest > > in pKVM mode on a LibreTech Le Potato board, with a bisection pointing > > at this change. The system dies like this: > > > > # selftests: kvm: arch_timer_edge_cases > > [ 130.599140] kvm [4518]: nVHE hyp panic at: [<ffff800081102b58>] __kvm_nvhe___vgic_v3_restore_vmcr_aprs+0x8/0x84! > Thanks, fix posted here [*], although as a consequence this machine will > stop running KVM in protected mode (which was never meant to work in the > first place). Yeah, I wasn't clear if this was something that was intended to work or not - it's all in a constant state of flux.
On Tuesday 04 Feb 2025 at 14:32:19 (+0000), Mark Brown wrote:
> On Mon, Feb 03, 2025 at 03:19:03PM -0800, Oliver Upton wrote:
> > On Mon, Feb 03, 2025 at 07:50:44PM +0000, Mark Brown wrote:
>
> > > Current mainline crashes when running the arch_timer_edge_cases selftest
> > > in pKVM mode on a LibreTech Le Potato board, with a bisection pointing
> > > at this change. The system dies like this:
> > >
> > > # selftests: kvm: arch_timer_edge_cases
> > > [ 130.599140] kvm [4518]: nVHE hyp panic at: [<ffff800081102b58>] __kvm_nvhe___vgic_v3_restore_vmcr_aprs+0x8/0x84!
>
> > Thanks, fix posted here [*], although as a consequence this machine will
> > stop running KVM in protected mode (which was never meant to work in the
> > first place).
>
> Yeah, I wasn't clear if this was something that was intended to work or
> not - it's all in a constant state of flux.
FWIW a770ee80e662 ("KVM: arm64: pkvm: Disable GICv2 support") did try to
explicitly disable things, but was clearly incomplete...
Thanks guys for the report and the quick fix!
Quentin
© 2016 - 2025 Red Hat, Inc.