The AMD APM states that if VMMCALL instruction is not intercepted, the
instruction raises a #UD exception.
Create a vmmcall exit handler that generates a #UD if a VMMCALL exit
from L2 is being handled by L0, which means that L1 did not intercept
the VMMCALL instruction. The exception to this is if the exiting
instruction was for Hyper-V L2 TLB flush hypercalls as they are handled
by L0.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Kevin Cheng <chengkev@google.com>
---
arch/x86/kvm/svm/hyperv.h | 11 +++++++++++
arch/x86/kvm/svm/nested.c | 4 +---
arch/x86/kvm/svm/svm.c | 19 ++++++++++++++++++-
3 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h
index d3f8bfc05832e..9af03970d40c2 100644
--- a/arch/x86/kvm/svm/hyperv.h
+++ b/arch/x86/kvm/svm/hyperv.h
@@ -41,6 +41,13 @@ static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
return hv_vcpu->vp_assist_page.nested_control.features.directhypercall;
}
+static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+{
+ return guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
+ nested_svm_l2_tlb_flush_enabled(vcpu) &&
+ kvm_hv_is_tlb_flush_hcall(vcpu);
+}
+
void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
#else /* CONFIG_KVM_HYPERV */
static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
@@ -48,6 +55,10 @@ static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
{
return false;
}
+static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
static inline void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) {}
#endif /* CONFIG_KVM_HYPERV */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index de90b104a0dd5..45d1496031a74 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1674,9 +1674,7 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
}
case SVM_EXIT_VMMCALL:
/* Hyper-V L2 TLB flush hypercall is handled by L0 */
- if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
- nested_svm_l2_tlb_flush_enabled(vcpu) &&
- kvm_hv_is_tlb_flush_hcall(vcpu))
+ if (nested_svm_is_l2_tlb_flush_hcall(vcpu))
return NESTED_EXIT_HOST;
break;
default:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f8f9b7a124c36..d662d5ce986ac 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -52,6 +52,7 @@
#include "svm.h"
#include "svm_ops.h"
+#include "hyperv.h"
#include "kvm_onhyperv.h"
#include "svm_onhyperv.h"
@@ -3258,6 +3259,22 @@ static int bus_lock_exit(struct kvm_vcpu *vcpu)
return 0;
}
+static int vmmcall_interception(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Per the AMD APM, VMMCALL raises #UD if the VMMCALL intercept
+ * is not set. For an L2 guest, inject #UD as L1 did not intercept
+ * VMMCALL, except for Hyper-V L2 TLB flush hypercalls as they
+ * are handled by L0.
+ */
+ if (is_guest_mode(vcpu) && !nested_svm_is_l2_tlb_flush_hcall(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ return kvm_emulate_hypercall(vcpu);
+}
+
static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_READ_CR0] = cr_interception,
[SVM_EXIT_READ_CR3] = cr_interception,
@@ -3308,7 +3325,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
[SVM_EXIT_VMRUN] = vmrun_interception,
- [SVM_EXIT_VMMCALL] = kvm_emulate_hypercall,
+ [SVM_EXIT_VMMCALL] = vmmcall_interception,
[SVM_EXIT_VMLOAD] = vmload_interception,
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
--
2.53.0.473.g4a7958ca14-goog
+Vitaly
On Sat, Feb 28, 2026, Kevin Cheng wrote:
> The AMD APM states that if VMMCALL instruction is not intercepted, the
> instruction raises a #UD exception.
>
> Create a vmmcall exit handler that generates a #UD if a VMMCALL exit
> from L2 is being handled by L0, which means that L1 did not intercept
> the VMMCALL instruction. The exception to this is if the exiting
> instruction was for Hyper-V L2 TLB flush hypercalls as they are handled
> by L0.
*sigh*
Except this changelog doesn't capture *any* of the subtlety. And were it not for
an internal bug discussion, I would have literally no clue WTF is going on.
There's not generic missed #UD bug, because this code in recalc_intercepts()
effectively disables the VMMCALL intercept in vmcb02 if the intercept isn't set
in vmcb12.
/*
* We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
* flush feature is enabled.
*/
if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
I.e. the only bug *knowingly* being fixed, maybe, is an edge case where Hyper-V
TLB flushes are enabled for L2 and the hypercall is something other than one of
the blessed Hyper-V hypercalls. But in that case, it's not at all clear to me
that synthesizing a #UD into L2 is correct. I can't find anything in the TLFS
(not surprising), so I guess anything goes?
Vitaly,
The scenario in question is where HV_X64_NESTED_DIRECT_FLUSH is enabled, L1 doesn't
intercept VMMCALL, and L2 executes VMMCALL with something other than one of the
Hyper-V TLB flush hypercalls. The proposed change is to synthesize #UD (which
is what happens if HV_X64_NESTED_DIRECT_FLUSH isn't enable). Does that sound
sane? Should KVM instead return an error.
As for bugs that are *unknowingly* being fixed, intercepting VMMCALL and manually
injecting a #UD effectively fixes a bad interaction with KVM's asinine
KVM_X86_QUIRK_FIX_HYPERCALL_INSN. If KVM doesn't intercept VMMCALL while L2
is active (L1 doesn't wants to intercept VMMCALL and the Hyper-V L2 TLB flush
hypercall is disabled), then L2 will hang on the VMMCALL as KVM will intercept
the #UD, then "emulate" VMMCALL by trying to fixup the opcode and restarting the
instruction.
That can be "fixed" by disabling the quirk, or by hacking the fixup like so:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index db3f393192d9..3f6d9950f8f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10506,17 +10506,22 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
* If the quirk is disabled, synthesize a #UD and let the guest pick up
* the pieces.
*/
- if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_FIX_HYPERCALL_INSN)) {
- ctxt->exception.error_code_valid = false;
- ctxt->exception.vector = UD_VECTOR;
- ctxt->have_exception = true;
- return X86EMUL_PROPAGATE_FAULT;
- }
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_FIX_HYPERCALL_INSN))
+ goto inject_ud;
kvm_x86_call(patch_hypercall)(vcpu, instruction);
+ if (is_guest_mode(vcpu) && !memcmp(instruction, ctxt->fetch.data, 3))
+ goto inject_ud;
+
return emulator_write_emulated(ctxt, rip, instruction, 3,
&ctxt->exception);
+
+inject_ud:
+ ctxt->exception.error_code_valid = false;
+ ctxt->exception.vector = UD_VECTOR;
+ ctxt->have_exception = true;
+ return X86EMUL_PROPAGATE_FAULT;
}
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
--
But that's extremely convoluted for no purpose that I can see. Not intercepting
VMMCALL requires _more_ code and is overall more complex.
So unless I'm missing something, I'm going to tack on this to fix the L2 infinite
loop, and then figure out what to do about Hyper-V, pending Vitaly's input.
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 45d1496031a7..a55af647649c 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -156,13 +156,6 @@ void recalc_intercepts(struct vcpu_svm *svm)
vmcb_clr_intercept(c, INTERCEPT_VINTR);
}
- /*
- * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
- * flush feature is enabled.
- */
- if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
- vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
-
for (i = 0; i < MAX_INTERCEPT; i++)
c->intercepts[i] |= g->intercepts[i];
Sean Christopherson <seanjc@google.com> writes:
> +Vitaly
>
> On Sat, Feb 28, 2026, Kevin Cheng wrote:
>> The AMD APM states that if VMMCALL instruction is not intercepted, the
>> instruction raises a #UD exception.
>>
>> Create a vmmcall exit handler that generates a #UD if a VMMCALL exit
>> from L2 is being handled by L0, which means that L1 did not intercept
>> the VMMCALL instruction. The exception to this is if the exiting
>> instruction was for Hyper-V L2 TLB flush hypercalls as they are handled
>> by L0.
>
> *sigh*
>
> Except this changelog doesn't capture *any* of the subtlety. And were it not for
> an internal bug discussion, I would have literally no clue WTF is going on.
>
> There's not generic missed #UD bug, because this code in recalc_intercepts()
> effectively disables the VMMCALL intercept in vmcb02 if the intercept isn't set
> in vmcb12.
>
> /*
> * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
> * flush feature is enabled.
> */
> if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
> vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
>
> I.e. the only bug *knowingly* being fixed, maybe, is an edge case where Hyper-V
> TLB flushes are enabled for L2 and the hypercall is something other than one of
> the blessed Hyper-V hypercalls. But in that case, it's not at all clear to me
> that synthesizing a #UD into L2 is correct. I can't find anything in the TLFS
> (not surprising), so I guess anything goes?
>
> Vitaly,
>
> The scenario in question is where HV_X64_NESTED_DIRECT_FLUSH is enabled, L1 doesn't
> intercept VMMCALL, and L2 executes VMMCALL with something other than one of the
> Hyper-V TLB flush hypercalls. The proposed change is to synthesize #UD (which
> is what happens if HV_X64_NESTED_DIRECT_FLUSH isn't enable). Does that sound
> sane? Should KVM instead return an error.
I think this does sound sane. In the situation, when the hypercall
issued by L2 is not a TLB flush hypercall, I believe the behavior should
be exactly the same whether HV_X64_NESTED_DIRECT_FLUSH is enabled or
not.
Also, I'm tempted to say that L1 not intercepting VMMCALL and at the
same time using extended features like HV_X64_NESTED_DIRECT_FLUSH can be
an unsupported combo and we can just refuse to run L2 or crash L1 for
misbehaving but I'm afraid this can backfire. E.g. when Hyper-V is
shutting down or in some other 'special' situation.
>
> As for bugs that are *unknowingly* being fixed, intercepting VMMCALL and manually
> injecting a #UD effectively fixes a bad interaction with KVM's asinine
> KVM_X86_QUIRK_FIX_HYPERCALL_INSN. If KVM doesn't intercept VMMCALL while L2
> is active (L1 doesn't wants to intercept VMMCALL and the Hyper-V L2 TLB flush
> hypercall is disabled), then L2 will hang on the VMMCALL as KVM will intercept
> the #UD, then "emulate" VMMCALL by trying to fixup the opcode and restarting the
> instruction.
>
> That can be "fixed" by disabling the quirk, or by hacking the fixup like so:
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index db3f393192d9..3f6d9950f8f8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10506,17 +10506,22 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
> * If the quirk is disabled, synthesize a #UD and let the guest pick up
> * the pieces.
> */
> - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_FIX_HYPERCALL_INSN)) {
> - ctxt->exception.error_code_valid = false;
> - ctxt->exception.vector = UD_VECTOR;
> - ctxt->have_exception = true;
> - return X86EMUL_PROPAGATE_FAULT;
> - }
> + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_FIX_HYPERCALL_INSN))
> + goto inject_ud;
>
> kvm_x86_call(patch_hypercall)(vcpu, instruction);
>
> + if (is_guest_mode(vcpu) && !memcmp(instruction, ctxt->fetch.data, 3))
> + goto inject_ud;
> +
> return emulator_write_emulated(ctxt, rip, instruction, 3,
> &ctxt->exception);
> +
> +inject_ud:
> + ctxt->exception.error_code_valid = false;
> + ctxt->exception.vector = UD_VECTOR;
> + ctxt->have_exception = true;
> + return X86EMUL_PROPAGATE_FAULT;
> }
>
> static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
> --
>
> But that's extremely convoluted for no purpose that I can see. Not intercepting
> VMMCALL requires _more_ code and is overall more complex.
>
> So unless I'm missing something, I'm going to tack on this to fix the L2 infinite
> loop, and then figure out what to do about Hyper-V, pending Vitaly's input.
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 45d1496031a7..a55af647649c 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -156,13 +156,6 @@ void recalc_intercepts(struct vcpu_svm *svm)
> vmcb_clr_intercept(c, INTERCEPT_VINTR);
> }
>
> - /*
> - * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
> - * flush feature is enabled.
> - */
> - if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
> - vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
> -
> for (i = 0; i < MAX_INTERCEPT; i++)
> c->intercepts[i] |= g->intercepts[i];
>
>
--
Vitaly
© 2016 - 2026 Red Hat, Inc.