Add a test that verifies that KVM correctly injects a #GP for nested
VMRUN and a shutdown for nested #VMEXIT, if the GPA of vmcb12 cannot be
mapped.
Signed-off-by: Yosry Ahmed <yosry@kernel.org>
---
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../kvm/x86/svm_nested_invalid_vmcb12_gpa.c | 98 +++++++++++++++++++
2 files changed, 99 insertions(+)
create mode 100644 tools/testing/selftests/kvm/x86/svm_nested_invalid_vmcb12_gpa.c
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 36b48e766e499..f12e7c17d379d 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -110,6 +110,7 @@ TEST_GEN_PROGS_x86 += x86/state_test
TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_invalid_vmcb12_gpa
TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
TEST_GEN_PROGS_x86 += x86/svm_lbr_nested_state
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_invalid_vmcb12_gpa.c b/tools/testing/selftests/kvm/x86/svm_nested_invalid_vmcb12_gpa.c
new file mode 100644
index 0000000000000..c6d5f712120d1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_invalid_vmcb12_gpa.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define SYNC_GP 101
+#define SYNC_L2_STARTED 102
+
+u64 valid_vmcb12_gpa;
+int gp_triggered;
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_ASSERT(!gp_triggered);
+ GUEST_SYNC(SYNC_GP);
+ gp_triggered = 1;
+ regs->rax = valid_vmcb12_gpa;
+}
+
+static void l2_guest_code(void)
+{
+ GUEST_SYNC(SYNC_L2_STARTED);
+ vmcall();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, u64 invalid_vmcb12_gpa)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ valid_vmcb12_gpa = svm->vmcb_gpa;
+
+ run_guest(svm->vmcb, invalid_vmcb12_gpa); /* #GP */
+
+ /* GP handler should jump here */
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_x86_state *state;
+ vm_vaddr_t nested_gva = 0;
+ struct kvm_vcpu *vcpu;
+ uint32_t maxphyaddr;
+ u64 max_legal_gpa;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+
+ /*
+ * Find the max legal GPA that is not backed by a memslot (i.e. cannot
+ * be mapped by KVM).
+ */
+ maxphyaddr = kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_MAX_PHY_ADDR);
+ max_legal_gpa = BIT_ULL(maxphyaddr) - PAGE_SIZE;
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_args_set(vcpu, 2, nested_gva, max_legal_gpa);
+
+ /* VMRUN with max_legal_gpa, KVM injects a #GP */
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], SYNC_GP);
+
+ /*
+ * Enter L2 (with a legit vmcb12 GPA), then overwrite vmcb12 GPA with
+ * max_legal_gpa. KVM will fail to map vmcb12 on nested VM-Exit and
+ * cause a shutdown.
+ */
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], SYNC_L2_STARTED);
+
+ state = vcpu_save_state(vcpu);
+ state->nested.hdr.svm.vmcb_pa = max_legal_gpa;
+ vcpu_load_state(vcpu, state);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_x86_state_cleanup(state);
+ kvm_vm_free(vm);
+ return 0;
+}
--
2.53.0.473.g4a7958ca14-goog
On Mon, Mar 2, 2026 at 4:43 PM Yosry Ahmed <yosry@kernel.org> wrote: > > Add a test that verifies that KVM correctly injects a #GP for nested > VMRUN and a shutdown for nested #VMEXIT, if the GPA of vmcb12 cannot be > mapped. > > Signed-off-by: Yosry Ahmed <yosry@kernel.org> > ... > + /* > + * Find the max legal GPA that is not backed by a memslot (i.e. cannot > + * be mapped by KVM). > + */ > + maxphyaddr = kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_MAX_PHY_ADDR); > + max_legal_gpa = BIT_ULL(maxphyaddr) - PAGE_SIZE; > + vcpu_alloc_svm(vm, &nested_gva); > + vcpu_args_set(vcpu, 2, nested_gva, max_legal_gpa); > + > + /* VMRUN with max_legal_gpa, KVM injects a #GP */ > + vcpu_run(vcpu); > + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); > + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); > + TEST_ASSERT_EQ(uc.args[1], SYNC_GP); Why would this raise #GP? That isn't architected behavior.
On Thu, Mar 5, 2026 at 2:30 PM Jim Mattson <jmattson@google.com> wrote: > > On Mon, Mar 2, 2026 at 4:43 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > Add a test that verifies that KVM correctly injects a #GP for nested > > VMRUN and a shutdown for nested #VMEXIT, if the GPA of vmcb12 cannot be > > mapped. > > > > Signed-off-by: Yosry Ahmed <yosry@kernel.org> > > ... > > + /* > > + * Find the max legal GPA that is not backed by a memslot (i.e. cannot > > + * be mapped by KVM). > > + */ > > + maxphyaddr = kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_MAX_PHY_ADDR); > > + max_legal_gpa = BIT_ULL(maxphyaddr) - PAGE_SIZE; > > + vcpu_alloc_svm(vm, &nested_gva); > > + vcpu_args_set(vcpu, 2, nested_gva, max_legal_gpa); > > + > > + /* VMRUN with max_legal_gpa, KVM injects a #GP */ > > + vcpu_run(vcpu); > > + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); > > + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); > > + TEST_ASSERT_EQ(uc.args[1], SYNC_GP); > > Why would this raise #GP? That isn't architected behavior. I don't see architected behavior in the APM for what happens if VMRUN fails to load the VMCB from memory. I guess it should be the same as what would happen if a PTE is pointing to a physical address that doesn't exist? Maybe #MC?
On Thu, Mar 5, 2026 at 2:52 PM Yosry Ahmed <yosry@kernel.org> wrote: > > On Thu, Mar 5, 2026 at 2:30 PM Jim Mattson <jmattson@google.com> wrote: > > > > On Mon, Mar 2, 2026 at 4:43 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > > Add a test that verifies that KVM correctly injects a #GP for nested > > > VMRUN and a shutdown for nested #VMEXIT, if the GPA of vmcb12 cannot be > > > mapped. > > > > > > Signed-off-by: Yosry Ahmed <yosry@kernel.org> > > > ... > > > + /* > > > + * Find the max legal GPA that is not backed by a memslot (i.e. cannot > > > + * be mapped by KVM). > > > + */ > > > + maxphyaddr = kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_MAX_PHY_ADDR); > > > + max_legal_gpa = BIT_ULL(maxphyaddr) - PAGE_SIZE; > > > + vcpu_alloc_svm(vm, &nested_gva); > > > + vcpu_args_set(vcpu, 2, nested_gva, max_legal_gpa); > > > + > > > + /* VMRUN with max_legal_gpa, KVM injects a #GP */ > > > + vcpu_run(vcpu); > > > + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); > > > + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); > > > + TEST_ASSERT_EQ(uc.args[1], SYNC_GP); > > > > Why would this raise #GP? That isn't architected behavior. > > I don't see architected behavior in the APM for what happens if VMRUN > fails to load the VMCB from memory. I guess it should be the same as > what would happen if a PTE is pointing to a physical address that > doesn't exist? Maybe #MC? Reads from non-existent memory return all 1's, so I would expect a #VMEXIT with exitcode VMEXIT_INVALID.
On Thu, Mar 5, 2026 at 4:05 PM Jim Mattson <jmattson@google.com> wrote: > > On Thu, Mar 5, 2026 at 2:52 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > On Thu, Mar 5, 2026 at 2:30 PM Jim Mattson <jmattson@google.com> wrote: > > > > > > On Mon, Mar 2, 2026 at 4:43 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > > > > Add a test that verifies that KVM correctly injects a #GP for nested > > > > VMRUN and a shutdown for nested #VMEXIT, if the GPA of vmcb12 cannot be > > > > mapped. > > > > > > > > Signed-off-by: Yosry Ahmed <yosry@kernel.org> > > > > ... > > > > + /* > > > > + * Find the max legal GPA that is not backed by a memslot (i.e. cannot > > > > + * be mapped by KVM). > > > > + */ > > > > + maxphyaddr = kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_MAX_PHY_ADDR); > > > > + max_legal_gpa = BIT_ULL(maxphyaddr) - PAGE_SIZE; > > > > + vcpu_alloc_svm(vm, &nested_gva); > > > > + vcpu_args_set(vcpu, 2, nested_gva, max_legal_gpa); > > > > + > > > > + /* VMRUN with max_legal_gpa, KVM injects a #GP */ > > > > + vcpu_run(vcpu); > > > > + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); > > > > + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); > > > > + TEST_ASSERT_EQ(uc.args[1], SYNC_GP); > > > > > > Why would this raise #GP? That isn't architected behavior. > > > > I don't see architected behavior in the APM for what happens if VMRUN > > fails to load the VMCB from memory. I guess it should be the same as > > what would happen if a PTE is pointing to a physical address that > > doesn't exist? Maybe #MC? > > Reads from non-existent memory return all 1's Today I learned :) Do all x86 CPUs do this? > so I would expect a #VMEXIT with exitcode VMEXIT_INVALID. This would actually simplify the logic, as it would be the same failure mode as failed consistency checks. That being said, KVM has been injecting a #GP when it fails to map vmcb12 since the beginning. It also does the same thing for VMSAVE/VMLOAD, which seems to also not be architectural. This would be more annoying to handle correctly because we'll need to copy all 1's to the relevant fields in vmcb12 or vmcb01. Sean, what do you want us to do here?
On Thu, Mar 5, 2026 at 4:40 PM Yosry Ahmed <yosry@kernel.org> wrote: > > On Thu, Mar 5, 2026 at 4:05 PM Jim Mattson <jmattson@google.com> wrote: > > > > On Thu, Mar 5, 2026 at 2:52 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > > On Thu, Mar 5, 2026 at 2:30 PM Jim Mattson <jmattson@google.com> wrote: > > > > > > > > On Mon, Mar 2, 2026 at 4:43 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > > > > > > Add a test that verifies that KVM correctly injects a #GP for nested > > > > > VMRUN and a shutdown for nested #VMEXIT, if the GPA of vmcb12 cannot be > > > > > mapped. > > > > > > > > > > Signed-off-by: Yosry Ahmed <yosry@kernel.org> > > > > > ... > > > > > + /* > > > > > + * Find the max legal GPA that is not backed by a memslot (i.e. cannot > > > > > + * be mapped by KVM). > > > > > + */ > > > > > + maxphyaddr = kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_MAX_PHY_ADDR); > > > > > + max_legal_gpa = BIT_ULL(maxphyaddr) - PAGE_SIZE; > > > > > + vcpu_alloc_svm(vm, &nested_gva); > > > > > + vcpu_args_set(vcpu, 2, nested_gva, max_legal_gpa); > > > > > + > > > > > + /* VMRUN with max_legal_gpa, KVM injects a #GP */ > > > > > + vcpu_run(vcpu); > > > > > + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); > > > > > + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); > > > > > + TEST_ASSERT_EQ(uc.args[1], SYNC_GP); > > > > > > > > Why would this raise #GP? That isn't architected behavior. > > > > > > I don't see architected behavior in the APM for what happens if VMRUN > > > fails to load the VMCB from memory. I guess it should be the same as > > > what would happen if a PTE is pointing to a physical address that > > > doesn't exist? Maybe #MC? > > > > Reads from non-existent memory return all 1's > > Today I learned :) Do all x86 CPUs do this? Yes. If no device claims the address, reads return all 1s. I think you can thank pull-up resistors for that. > > so I would expect a #VMEXIT with exitcode VMEXIT_INVALID. > > This would actually simplify the logic, as it would be the same > failure mode as failed consistency checks. That being said, KVM has > been injecting a #GP when it fails to map vmcb12 since the beginning. KVM has never been known for its attention to detail. > It also does the same thing for VMSAVE/VMLOAD, which seems to also not > be architectural. This would be more annoying to handle correctly > because we'll need to copy all 1's to the relevant fields in vmcb12 or > vmcb01. Or just exit to userspace with KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. I think on the VMX side, this sort of thing goes through kvm_handle_memory_failure().
On Thu, Mar 05, 2026, Jim Mattson wrote:
> On Thu, Mar 5, 2026 at 4:40 PM Yosry Ahmed <yosry@kernel.org> wrote:
> >
> > On Thu, Mar 5, 2026 at 4:05 PM Jim Mattson <jmattson@google.com> wrote:
> > >
> > > On Thu, Mar 5, 2026 at 2:52 PM Yosry Ahmed <yosry@kernel.org> wrote:
> > > >
> > > > On Thu, Mar 5, 2026 at 2:30 PM Jim Mattson <jmattson@google.com> wrote:
> > > > >
> > > > > On Mon, Mar 2, 2026 at 4:43 PM Yosry Ahmed <yosry@kernel.org> wrote:
> > > > > >
> > > > > > Add a test that verifies that KVM correctly injects a #GP for nested
> > > > > > VMRUN and a shutdown for nested #VMEXIT, if the GPA of vmcb12 cannot be
> > > > > > mapped.
> > > > > >
> > > > > > Signed-off-by: Yosry Ahmed <yosry@kernel.org>
> > > > > > ...
> > > > > > + /*
> > > > > > + * Find the max legal GPA that is not backed by a memslot (i.e. cannot
> > > > > > + * be mapped by KVM).
> > > > > > + */
> > > > > > + maxphyaddr = kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_MAX_PHY_ADDR);
> > > > > > + max_legal_gpa = BIT_ULL(maxphyaddr) - PAGE_SIZE;
> > > > > > + vcpu_alloc_svm(vm, &nested_gva);
> > > > > > + vcpu_args_set(vcpu, 2, nested_gva, max_legal_gpa);
> > > > > > +
> > > > > > + /* VMRUN with max_legal_gpa, KVM injects a #GP */
> > > > > > + vcpu_run(vcpu);
> > > > > > + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
> > > > > > + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
> > > > > > + TEST_ASSERT_EQ(uc.args[1], SYNC_GP);
> > > > >
> > > > > Why would this raise #GP? That isn't architected behavior.
> > > >
> > > > I don't see architected behavior in the APM for what happens if VMRUN
> > > > fails to load the VMCB from memory. I guess it should be the same as
> > > > what would happen if a PTE is pointing to a physical address that
> > > > doesn't exist? Maybe #MC?
> > >
> > > Reads from non-existent memory return all 1's
> >
> > Today I learned :) Do all x86 CPUs do this?
>
> Yes. If no device claims the address, reads return all 1s. I think you
> can thank pull-up resistors for that.
Ya, it's officially documented PCI behavior. Writes are dropped, reads return
all 1s.
> > > so I would expect a #VMEXIT with exitcode VMEXIT_INVALID.
> >
> > This would actually simplify the logic, as it would be the same
> > failure mode as failed consistency checks. That being said, KVM has
> > been injecting a #GP when it fails to map vmcb12 since the beginning.
>
> KVM has never been known for its attention to detail.
LOL, hey, we try. Sometimes we just forget things though :-)
7a35e515a705 ("KVM: VMX: Properly handle kvm_read/write_guest_virt*() result")
> > It also does the same thing for VMSAVE/VMLOAD, which seems to also not
> > be architectural. This would be more annoying to handle correctly
> > because we'll need to copy all 1's to the relevant fields in vmcb12 or
> > vmcb01.
>
> Or just exit to userspace with
> KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. I think on the
> VMX side, this sort of thing goes through kvm_handle_memory_failure().
Yep, I think this is the correct fixup:
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b191c6cab57d..78a542c6ddf1 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1105,10 +1105,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
vmcb12_gpa = svm->vmcb->save.rax;
err = nested_svm_copy_vmcb12_to_cache(vcpu, vmcb12_gpa);
- if (err == -EFAULT) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
+ if (err == -EFAULT)
+ return kvm_handle_memory_failure(vcpu, X86EMUL_UNHANDLEABLE, NULL);
/*
* Advance RIP if #GP or #UD are not injected, but otherwise stop if
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index b191c6cab57d..78a542c6ddf1 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -1105,10 +1105,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
>
> vmcb12_gpa = svm->vmcb->save.rax;
> err = nested_svm_copy_vmcb12_to_cache(vcpu, vmcb12_gpa);
> - if (err == -EFAULT) {
> - kvm_inject_gp(vcpu, 0);
> - return 1;
> - }
> + if (err == -EFAULT)
> + return kvm_handle_memory_failure(vcpu, X86EMUL_UNHANDLEABLE, NULL);
Why not call kvm_prepare_emulation_failure_exit() directly? Is the
premise that kvm_handle_memory_failure() might evolve to do more
things for emulation failures that are specifically caused by memory
failures, other than potentially injecting an exception?
On Fri, Mar 06, 2026, Yosry Ahmed wrote:
> > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > index b191c6cab57d..78a542c6ddf1 100644
> > --- a/arch/x86/kvm/svm/nested.c
> > +++ b/arch/x86/kvm/svm/nested.c
> > @@ -1105,10 +1105,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> >
> > vmcb12_gpa = svm->vmcb->save.rax;
> > err = nested_svm_copy_vmcb12_to_cache(vcpu, vmcb12_gpa);
> > - if (err == -EFAULT) {
> > - kvm_inject_gp(vcpu, 0);
> > - return 1;
> > - }
> > + if (err == -EFAULT)
> > + return kvm_handle_memory_failure(vcpu, X86EMUL_UNHANDLEABLE, NULL);
>
> Why not call kvm_prepare_emulation_failure_exit() directly?
Mostly because my mental coin-flip came up heads. But it's also one less line
of code, woot woot!
> Is the premise that kvm_handle_memory_failure() might evolve to do more
> things for emulation failures that are specifically caused by memory
> failures, other than potentially injecting an exception?
Yeah, more or less. I doubt kvm_handle_memory_failure() will ever actually evolve
into anything more sophisticated, but at the very least, using
kvm_handle_memory_failure() documents _why_ KVM can't handle emulation.
On second thought, I think using X86EMUL_IO_NEEDED would be more appropriate.
The memremap() is only reachable if allow_unsafe_mappings is enabled, and so for
a "default" configuration, failure can only occur on:
if (is_error_noslot_pfn(map->pfn))
return -EINVAL;
Which doesn't _guarantee_ that emulated I/O is required, but we're definitely
beyond splitting hairs at that point.
On Fri, Mar 6, 2026 at 8:35 AM Sean Christopherson <seanjc@google.com> wrote:
>
> On Fri, Mar 06, 2026, Yosry Ahmed wrote:
> > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > > index b191c6cab57d..78a542c6ddf1 100644
> > > --- a/arch/x86/kvm/svm/nested.c
> > > +++ b/arch/x86/kvm/svm/nested.c
> > > @@ -1105,10 +1105,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> > >
> > > vmcb12_gpa = svm->vmcb->save.rax;
> > > err = nested_svm_copy_vmcb12_to_cache(vcpu, vmcb12_gpa);
> > > - if (err == -EFAULT) {
> > > - kvm_inject_gp(vcpu, 0);
> > > - return 1;
> > > - }
> > > + if (err == -EFAULT)
> > > + return kvm_handle_memory_failure(vcpu, X86EMUL_UNHANDLEABLE, NULL);
> >
> > Why not call kvm_prepare_emulation_failure_exit() directly?
>
> Mostly because my mental coin-flip came up heads. But it's also one less line
> of code, woot woot!
>
> > Is the premise that kvm_handle_memory_failure() might evolve to do more
> > things for emulation failures that are specifically caused by memory
> > failures, other than potentially injecting an exception?
>
> Yeah, more or less. I doubt kvm_handle_memory_failure() will ever actually evolve
> into anything more sophisticated, but at the very least, using
> kvm_handle_memory_failure() documents _why_ KVM can't handle emulation.
Yeah I agree with this too.
>
> On second thought, I think using X86EMUL_IO_NEEDED would be more appropriate.
> The memremap() is only reachable if allow_unsafe_mappings is enabled, and so for
> a "default" configuration, failure can only occur on:
>
> if (is_error_noslot_pfn(map->pfn))
> return -EINVAL;
>
> Which doesn't _guarantee_ that emulated I/O is required, but we're definitely
> beyond splitting hairs at that point.
> > > It also does the same thing for VMSAVE/VMLOAD, which seems to also not > > > be architectural. This would be more annoying to handle correctly > > > because we'll need to copy all 1's to the relevant fields in vmcb12 or > > > vmcb01. > > > > Or just exit to userspace with > > KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. I think on the > > VMX side, this sort of thing goes through kvm_handle_memory_failure(). > > Yep, I think this is the correct fixup: Looks good, I was going to say ignore the series @ https://lore.kernel.org/kvm/20260305203005.1021335-1-yosry@kernel.org/, because I will incorporate the fix in it after patch 1 (the cleanup), and patch 2 will need to be redone such that the test checks for KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. But then I stumbled upon the VMSAVE/VMLOAD behavior and the #GP I was observing with vls=1 (see cover letter). So I dug a bit deeper. Turns out with vls=1, if the GPA is supported but not mapped, VMLOAD will generate a #NPF, and because there is no slot KVM will install an MMIO SPTE and emulate the instruction. The emulator will end up calling check_svme_pa() -> emulate_gp(). I didn't catch this initially because I was tracing kvm_queue_exception_e() and didn't get any hits, but I can see the call to inject_emulated_exception() with #GP so probably the compiler just inlines it. Anyway, we'll also need to update the emulator. Perhaps just return X86EMUL_UNHANDLEABLE from check_svme_pa() instead of injecting #GP, although I don't think this will always end up returning to userspace with KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. Looking at handle_emulation_failure(), we'll only immediately exit to userspace if KVM_CAP_EXIT_ON_EMULATION_FAILURE is set (because EMULTYPE_SKIP won't be set). Otherwise we'll inject a #UD, and only exit to userspace if the VMSAVE/VMLOAD came from L1. Not sure if that's good enough or if we need to augment the emulator somehow (e.g. new return value that always exits to userspace? Or allow EMULTYPE_SKIP x86_emulate_insn() -> check_perm() to change the emulation type to add EMULTYPE_SKIP?
On Fri, Mar 6, 2026 at 7:52 AM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > It also does the same thing for VMSAVE/VMLOAD, which seems to also not > > > > be architectural. This would be more annoying to handle correctly > > > > because we'll need to copy all 1's to the relevant fields in vmcb12 or > > > > vmcb01. > > > > > > Or just exit to userspace with > > > KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. I think on the > > > VMX side, this sort of thing goes through kvm_handle_memory_failure(). > > > > Yep, I think this is the correct fixup: > > Looks good, I was going to say ignore the series @ > https://lore.kernel.org/kvm/20260305203005.1021335-1-yosry@kernel.org/, > because I will incorporate the fix in it after patch 1 (the cleanup), > and patch 2 will need to be redone such that the test checks for > KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. But then I > stumbled upon the VMSAVE/VMLOAD behavior and the #GP I was observing > with vls=1 (see cover letter). > > So I dug a bit deeper. Turns out with vls=1, if the GPA is supported > but not mapped, VMLOAD will generate a #NPF, and because there is no > slot KVM will install an MMIO SPTE and emulate the instruction. The > emulator will end up calling check_svme_pa() -> emulate_gp(). I didn't > catch this initially because I was tracing kvm_queue_exception_e() and > didn't get any hits, but I can see the call to > inject_emulated_exception() with #GP so probably the compiler just > inlines it. > > Anyway, we'll also need to update the emulator. Perhaps just return > X86EMUL_UNHANDLEABLE from check_svme_pa() instead of injecting #GP, Actually, not quite. check_svme_pa() should keep injecting #GP, but based on checking rax against kvm_host.maxphyaddr instead of the hardcoded 0xffff000000000000ULL value. The address my test is using is 0xffffffffff000, which is a legal address on Turin (52 bit phyaddr), but check_svme_pa() thinks it isn't and injects #GP. I think if that is fixed, check_svme_pa() will succeed, and then emulation will fail anyway because it's not implemented. So that seems like a separate bug. But then if the address is below maxphyaddr and the EFER.SVME check succeeds, I think we should return X86EMUL_UNHANDLEABLE? I cannot immediately tell if this will organically happen in x86_emulate_insn() after check_svme_pa() returns. The rest of what I said stands. > although I don't think this will always end up returning to userspace > with > KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. Looking at > handle_emulation_failure(), we'll only immediately exit to userspace > if KVM_CAP_EXIT_ON_EMULATION_FAILURE is set (because EMULTYPE_SKIP > won't be set). Otherwise we'll inject a #UD, and only exit to > userspace if the VMSAVE/VMLOAD came from L1. > > Not sure if that's good enough or if we need to augment the emulator > somehow (e.g. new return value that always exits to userspace? Or > allow EMULTYPE_SKIP x86_emulate_insn() -> check_perm() to change the > emulation type to add EMULTYPE_SKIP? On Fri, Mar 6, 2026 at 7:52 AM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > It also does the same thing for VMSAVE/VMLOAD, which seems to also not > > > > be architectural. This would be more annoying to handle correctly > > > > because we'll need to copy all 1's to the relevant fields in vmcb12 or > > > > vmcb01. > > > > > > Or just exit to userspace with > > > KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. I think on the > > > VMX side, this sort of thing goes through kvm_handle_memory_failure(). > > > > Yep, I think this is the correct fixup: > > Looks good, I was going to say ignore the series @ > https://lore.kernel.org/kvm/20260305203005.1021335-1-yosry@kernel.org/, > because I will incorporate the fix in it after patch 1 (the cleanup), > and patch 2 will need to be redone such that the test checks for > KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. But then I > stumbled upon the VMSAVE/VMLOAD behavior and the #GP I was observing > with vls=1 (see cover letter). > > So I dug a bit deeper. Turns out with vls=1, if the GPA is supported > but not mapped, VMLOAD will generate a #NPF, and because there is no > slot KVM will install an MMIO SPTE and emulate the instruction. The > emulator will end up calling check_svme_pa() -> emulate_gp(). I didn't > catch this initially because I was tracing kvm_queue_exception_e() and > didn't get any hits, but I can see the call to > inject_emulated_exception() with #GP so probably the compiler just > inlines it. > > Anyway, we'll also need to update the emulator. Perhaps just return > X86EMUL_UNHANDLEABLE from check_svme_pa() instead of injecting #GP, > although I don't think this will always end up returning to userspace > with > KVM_EXIT_INTERNAL_ERROR/KVM_INTERNAL_ERROR_EMULATION. Looking at > handle_emulation_failure(), we'll only immediately exit to userspace > if KVM_CAP_EXIT_ON_EMULATION_FAILURE is set (because EMULTYPE_SKIP > won't be set). Otherwise we'll inject a #UD, and only exit to > userspace if the VMSAVE/VMLOAD came from L1. > > Not sure if that's good enough or if we need to augment the emulator > somehow (e.g. new return value that always exits to userspace? Or > allow EMULTYPE_SKIP x86_emulate_insn() -> check_perm() to change the > emulation type to add EMULTYPE_SKIP?
On Fri, Mar 6, 2026 at 9:54 AM Yosry Ahmed <yosry@kernel.org> wrote: > Actually, not quite. check_svme_pa() should keep injecting #GP, but > based on checking rax against kvm_host.maxphyaddr instead of the > hardcoded 0xffff000000000000ULL value. Shouldn't it check against the guest's maxphyaddr, in case allow_smaller_maxphyaddr is in use?
On Fri, Mar 6, 2026 at 2:15 PM Jim Mattson <jmattson@google.com> wrote: > > On Fri, Mar 6, 2026 at 9:54 AM Yosry Ahmed <yosry@kernel.org> wrote: > > Actually, not quite. check_svme_pa() should keep injecting #GP, but > > based on checking rax against kvm_host.maxphyaddr instead of the > > hardcoded 0xffff000000000000ULL value. > > Shouldn't it check against the guest's maxphyaddr, in case > allow_smaller_maxphyaddr is in use? Will respond to the other thread.
On Thu, Mar 5, 2026 at 5:39 PM Sean Christopherson <seanjc@google.com> wrote: > > On Thu, Mar 05, 2026, Jim Mattson wrote: > > On Thu, Mar 5, 2026 at 4:40 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > > On Thu, Mar 5, 2026 at 4:05 PM Jim Mattson <jmattson@google.com> wrote: > > > > > > > > On Thu, Mar 5, 2026 at 2:52 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > > > > > > On Thu, Mar 5, 2026 at 2:30 PM Jim Mattson <jmattson@google.com> wrote: > > > > > > > > > > > > On Mon, Mar 2, 2026 at 4:43 PM Yosry Ahmed <yosry@kernel.org> wrote: > > > > > > > > > > > > > > Add a test that verifies that KVM correctly injects a #GP for nested > > > > > > > VMRUN and a shutdown for nested #VMEXIT, if the GPA of vmcb12 cannot be > > > > > > > mapped. > > > > > > > > > > > > > > Signed-off-by: Yosry Ahmed <yosry@kernel.org> > > > > > > > ... > > > > > > > + /* > > > > > > > + * Find the max legal GPA that is not backed by a memslot (i.e. cannot > > > > > > > + * be mapped by KVM). > > > > > > > + */ > > > > > > > + maxphyaddr = kvm_cpuid_property(vcpu->cpuid, X86_PROPERTY_MAX_PHY_ADDR); > > > > > > > + max_legal_gpa = BIT_ULL(maxphyaddr) - PAGE_SIZE; > > > > > > > + vcpu_alloc_svm(vm, &nested_gva); > > > > > > > + vcpu_args_set(vcpu, 2, nested_gva, max_legal_gpa); > > > > > > > + > > > > > > > + /* VMRUN with max_legal_gpa, KVM injects a #GP */ > > > > > > > + vcpu_run(vcpu); > > > > > > > + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); > > > > > > > + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); > > > > > > > + TEST_ASSERT_EQ(uc.args[1], SYNC_GP); > > > > > > > > > > > > Why would this raise #GP? That isn't architected behavior. > > > > > > > > > > I don't see architected behavior in the APM for what happens if VMRUN > > > > > fails to load the VMCB from memory. I guess it should be the same as > > > > > what would happen if a PTE is pointing to a physical address that > > > > > doesn't exist? Maybe #MC? > > > > > > > > Reads from non-existent memory return all 1's > > > > > > Today I learned :) Do all x86 CPUs do this? > > > > Yes. If no device claims the address, reads return all 1s. I think you > > can thank pull-up resistors for that. > > Ya, it's officially documented PCI behavior. Writes are dropped, reads return > all 1s. LOL! PCI bus?!? These semantics were cast in stone long before anyone even dreamt of a PCI bus!
© 2016 - 2026 Red Hat, Inc.