From: Yang Weijiang <weijiang.yang@intel.com>
Don't emulate branch instructions, e.g. CALL/RET/JMP etc., that are
affected by Shadow Stacks and/or Indirect Branch Tracking when said
features are enabled in the guest, as fully emulating CET would require
significant complexity for no practical benefit (KVM shouldn't need to
emulate branch instructions on modern hosts). Simply doing nothing isn't
an option as that would allow a malicious entity to subvert CET
protections via the emulator.
Note! On far transfers, do NOT consult the current privilege level and
instead treat SHSTK/IBT as being enabled if they're enabled for User *or*
Supervisor mode. On inter-privilege level far transfers, SHSTK and IBT
can be in play for the target privilege level, i.e. checking the current
privilege could get a false negative, and KVM doesn't know the target
privilege level until emulation gets under way.
Suggested-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Cc: Mathias Krause <minipli@grsecurity.net>
Cc: John Allen <john.allen@amd.com>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/emulate.c | 58 ++++++++++++++++++++++++++++++++++--------
1 file changed, 47 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 542d3664afa3..e4be54a677b0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -178,6 +178,8 @@
#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
#define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
+#define ShadowStack ((u64)1 << 57) /* Instruction protected by Shadow Stack. */
+#define IndirBrnTrk ((u64)1 << 58) /* Instruction protected by IBT. */
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
@@ -4068,9 +4070,9 @@ static const struct opcode group4[] = {
static const struct opcode group5[] = {
F(DstMem | SrcNone | Lock, em_inc),
F(DstMem | SrcNone | Lock, em_dec),
- I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
- I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
- I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
+ I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk, em_call_near_abs),
+ I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk, em_call_far),
+ I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs),
I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
};
@@ -4332,11 +4334,11 @@ static const struct opcode opcode_table[256] = {
/* 0xC8 - 0xCF */
I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
I(Stack | IsBranch, em_leave),
- I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
- I(ImplicitOps | IsBranch, em_ret_far),
- D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
+ I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
+ I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
+ D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
D(ImplicitOps | No64 | IsBranch),
- II(ImplicitOps | IsBranch, em_iret, iret),
+ II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
/* 0xD0 - 0xD7 */
G(Src2One | ByteOp, group2), G(Src2One, group2),
G(Src2CL | ByteOp, group2), G(Src2CL, group2),
@@ -4352,7 +4354,7 @@ static const struct opcode opcode_table[256] = {
I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
/* 0xE8 - 0xEF */
- I(SrcImm | NearBranch | IsBranch, em_call),
+ I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
D(SrcImm | ImplicitOps | NearBranch | IsBranch),
I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
@@ -4371,7 +4373,7 @@ static const struct opcode opcode_table[256] = {
static const struct opcode twobyte_table[256] = {
/* 0x00 - 0x0F */
G(0, group6), GD(0, &group7), N, N,
- N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
+ N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk, em_syscall),
II(ImplicitOps | Priv, em_clts, clts), N,
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
@@ -4402,8 +4404,8 @@ static const struct opcode twobyte_table[256] = {
IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
II(ImplicitOps | Priv, em_rdmsr, rdmsr),
IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
- I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
- I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
+ I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk, em_sysenter),
+ I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
N, N,
N, N, N, N, N, N, N, N,
/* 0x40 - 0x4F */
@@ -4941,6 +4943,40 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
if (ctxt->d == 0)
return EMULATION_FAILED;
+ /*
+ * Reject emulation if KVM might need to emulate shadow stack updates
+ * and/or indirect branch tracking enforcement, which the emulator
+ * doesn't support.
+ */
+ if (opcode.flags & (ShadowStack | IndirBrnTrk) &&
+ ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
+ u64 u_cet = 0, s_cet = 0;
+
+ /*
+ * Check both User and Supervisor on far transfers as inter-
+ * privilege level transfers are impacted by CET at the target
+ * privilege levels, and that is not known at this time. The
+ * the expectation is that the guest will not require emulation
+ * of any CET-affected instructions at any privilege level.
+ */
+ if (!(opcode.flags & NearBranch))
+ u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
+ else if (ctxt->ops->cpl(ctxt) == 3)
+ u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
+ else
+ s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
+
+ if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
+ (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
+ return EMULATION_FAILED;
+
+ if ((u_cet | s_cet) & CET_SHSTK_EN && opcode.flags & ShadowStack)
+ return EMULATION_FAILED;
+
+ if ((u_cet | s_cet) & CET_ENDBR_EN && opcode.flags & IndirBrnTrk)
+ return EMULATION_FAILED;
+ }
+
ctxt->execute = opcode.u.execute;
if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
--
2.51.0.384.g4c02a37b29-goog
On 9/13/2025 7:22 AM, Sean Christopherson wrote: > From: Yang Weijiang <weijiang.yang@intel.com> > > Don't emulate branch instructions, e.g. CALL/RET/JMP etc., that are > affected by Shadow Stacks and/or Indirect Branch Tracking when said > features are enabled in the guest, as fully emulating CET would require > significant complexity for no practical benefit (KVM shouldn't need to > emulate branch instructions on modern hosts). Simply doing nothing isn't > an option as that would allow a malicious entity to subvert CET > protections via the emulator. > > Note! On far transfers, do NOT consult the current privilege level and > instead treat SHSTK/IBT as being enabled if they're enabled for User *or* > Supervisor mode. On inter-privilege level far transfers, SHSTK and IBT > can be in play for the target privilege level, i.e. checking the current > privilege could get a false negative, and KVM doesn't know the target > privilege level until emulation gets under way. About the emulator, there is a VMX exit reason EXIT_REASON_TASK_SWITCH. The VM Exit triggers the following path: EXIT_REASON_TASK_SWITCH handle_task_switch kvm_task_switch emulator_task_switch According to SDM, in Vol 3 Chapter "Task Management", section "Executing a Task" "If shadow stack is enabled, then the SSP of the task is located at the 4 bytes at offset 104 in the 32-bit TSS and is used by the processor to establish the SSP when a task switch occurs from a task associated with this TSS. Note that the processor does not write the SSP of the task initiating the task switch to the TSS of that task, and instead the SSP of the previous task is pushed onto the shadow stack of the new task." This case is not covered, although using CET in 32-bit guests should be a corner case. > > Suggested-by: Chao Gao <chao.gao@intel.com> > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > Cc: Mathias Krause <minipli@grsecurity.net> > Cc: John Allen <john.allen@amd.com> > Cc: Rick Edgecombe <rick.p.edgecombe@intel.com> > Signed-off-by: Chao Gao <chao.gao@intel.com> > Co-developed-by: Sean Christopherson <seanjc@google.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > arch/x86/kvm/emulate.c | 58 ++++++++++++++++++++++++++++++++++-------- > 1 file changed, 47 insertions(+), 11 deletions(-) > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index 542d3664afa3..e4be54a677b0 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -178,6 +178,8 @@ > #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ > #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ > #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */ > +#define ShadowStack ((u64)1 << 57) /* Instruction protected by Shadow Stack. */ > +#define IndirBrnTrk ((u64)1 << 58) /* Instruction protected by IBT. */ > > #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) > > @@ -4068,9 +4070,9 @@ static const struct opcode group4[] = { > static const struct opcode group5[] = { > F(DstMem | SrcNone | Lock, em_inc), > F(DstMem | SrcNone | Lock, em_dec), > - I(SrcMem | NearBranch | IsBranch, em_call_near_abs), > - I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far), > - I(SrcMem | NearBranch | IsBranch, em_jmp_abs), > + I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk, em_call_near_abs), > + I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk, em_call_far), > + I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs), > I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far), > I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined), > }; > @@ -4332,11 +4334,11 @@ static const struct opcode opcode_table[256] = { > /* 0xC8 - 0xCF */ > I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter), > I(Stack | IsBranch, em_leave), > - I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm), > - I(ImplicitOps | IsBranch, em_ret_far), > - D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn), > + I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm), > + I(ImplicitOps | IsBranch | ShadowStack, em_ret_far), > + D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn), > D(ImplicitOps | No64 | IsBranch), > - II(ImplicitOps | IsBranch, em_iret, iret), > + II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret), > /* 0xD0 - 0xD7 */ > G(Src2One | ByteOp, group2), G(Src2One, group2), > G(Src2CL | ByteOp, group2), G(Src2CL, group2), > @@ -4352,7 +4354,7 @@ static const struct opcode opcode_table[256] = { > I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), > I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), > /* 0xE8 - 0xEF */ > - I(SrcImm | NearBranch | IsBranch, em_call), > + I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call), > D(SrcImm | ImplicitOps | NearBranch | IsBranch), > I(SrcImmFAddr | No64 | IsBranch, em_jmp_far), > D(SrcImmByte | ImplicitOps | NearBranch | IsBranch), > @@ -4371,7 +4373,7 @@ static const struct opcode opcode_table[256] = { > static const struct opcode twobyte_table[256] = { > /* 0x00 - 0x0F */ > G(0, group6), GD(0, &group7), N, N, > - N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall), > + N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk, em_syscall), > II(ImplicitOps | Priv, em_clts, clts), N, > DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, > N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, > @@ -4402,8 +4404,8 @@ static const struct opcode twobyte_table[256] = { > IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), > II(ImplicitOps | Priv, em_rdmsr, rdmsr), > IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), > - I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter), > - I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit), > + I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk, em_sysenter), > + I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit), > N, N, > N, N, N, N, N, N, N, N, > /* 0x40 - 0x4F */ > @@ -4941,6 +4943,40 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int > if (ctxt->d == 0) > return EMULATION_FAILED; > > + /* > + * Reject emulation if KVM might need to emulate shadow stack updates > + * and/or indirect branch tracking enforcement, which the emulator > + * doesn't support. > + */ > + if (opcode.flags & (ShadowStack | IndirBrnTrk) && > + ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) { > + u64 u_cet = 0, s_cet = 0; > + > + /* > + * Check both User and Supervisor on far transfers as inter- > + * privilege level transfers are impacted by CET at the target > + * privilege levels, and that is not known at this time. The > + * the expectation is that the guest will not require emulation > + * of any CET-affected instructions at any privilege level. > + */ > + if (!(opcode.flags & NearBranch)) > + u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN; > + else if (ctxt->ops->cpl(ctxt) == 3) > + u_cet = CET_SHSTK_EN | CET_ENDBR_EN; > + else > + s_cet = CET_SHSTK_EN | CET_ENDBR_EN; > + > + if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) || > + (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))) > + return EMULATION_FAILED; > + > + if ((u_cet | s_cet) & CET_SHSTK_EN && opcode.flags & ShadowStack) > + return EMULATION_FAILED; > + > + if ((u_cet | s_cet) & CET_ENDBR_EN && opcode.flags & IndirBrnTrk) > + return EMULATION_FAILED; > + } > + > ctxt->execute = opcode.u.execute; > > if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
On 9/13/2025 7:22 AM, Sean Christopherson wrote: > From: Yang Weijiang <weijiang.yang@intel.com> > > Don't emulate branch instructions, e.g. CALL/RET/JMP etc., that are > affected by Shadow Stacks and/or Indirect Branch Tracking when said > features are enabled in the guest, as fully emulating CET would require > significant complexity for no practical benefit (KVM shouldn't need to > emulate branch instructions on modern hosts). Simply doing nothing isn't > an option as that would allow a malicious entity to subvert CET > protections via the emulator. > > Note! On far transfers, do NOT consult the current privilege level and > instead treat SHSTK/IBT as being enabled if they're enabled for User *or* > Supervisor mode. On inter-privilege level far transfers, SHSTK and IBT > can be in play for the target privilege level, i.e. checking the current > privilege could get a false negative, and KVM doesn't know the target > privilege level until emulation gets under way. > > Suggested-by: Chao Gao <chao.gao@intel.com> > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > Cc: Mathias Krause <minipli@grsecurity.net> > Cc: John Allen <john.allen@amd.com> > Cc: Rick Edgecombe <rick.p.edgecombe@intel.com> > Signed-off-by: Chao Gao <chao.gao@intel.com> > Co-developed-by: Sean Christopherson <seanjc@google.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- > arch/x86/kvm/emulate.c | 58 ++++++++++++++++++++++++++++++++++-------- > 1 file changed, 47 insertions(+), 11 deletions(-) > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index 542d3664afa3..e4be54a677b0 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -178,6 +178,8 @@ > #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ > #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ > #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */ > +#define ShadowStack ((u64)1 << 57) /* Instruction protected by Shadow Stack. */ > +#define IndirBrnTrk ((u64)1 << 58) /* Instruction protected by IBT. */ > > #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) > > @@ -4068,9 +4070,9 @@ static const struct opcode group4[] = { > static const struct opcode group5[] = { > F(DstMem | SrcNone | Lock, em_inc), > F(DstMem | SrcNone | Lock, em_dec), > - I(SrcMem | NearBranch | IsBranch, em_call_near_abs), > - I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far), > - I(SrcMem | NearBranch | IsBranch, em_jmp_abs), > + I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk, em_call_near_abs), > + I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk, em_call_far), > + I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs), > I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far), It seems this entry for 'FF 05' (Jump far, absolute indirect) needs to set ShadowStack and IndirBrnTrk as well? > I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined), > }; > @@ -4332,11 +4334,11 @@ static const struct opcode opcode_table[256] = { > /* 0xC8 - 0xCF */ > I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter), > I(Stack | IsBranch, em_leave), > - I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm), > - I(ImplicitOps | IsBranch, em_ret_far), > - D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn), > + I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm), > + I(ImplicitOps | IsBranch | ShadowStack, em_ret_far), > + D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn), > D(ImplicitOps | No64 | IsBranch), > - II(ImplicitOps | IsBranch, em_iret, iret), > + II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret), > /* 0xD0 - 0xD7 */ > G(Src2One | ByteOp, group2), G(Src2One, group2), > G(Src2CL | ByteOp, group2), G(Src2CL, group2), > @@ -4352,7 +4354,7 @@ static const struct opcode opcode_table[256] = { > I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), > I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), > /* 0xE8 - 0xEF */ > - I(SrcImm | NearBranch | IsBranch, em_call), > + I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call), > D(SrcImm | ImplicitOps | NearBranch | IsBranch), > I(SrcImmFAddr | No64 | IsBranch, em_jmp_far), > D(SrcImmByte | ImplicitOps | NearBranch | IsBranch), > @@ -4371,7 +4373,7 @@ static const struct opcode opcode_table[256] = { > static const struct opcode twobyte_table[256] = { > /* 0x00 - 0x0F */ > G(0, group6), GD(0, &group7), N, N, > - N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall), > + N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk, em_syscall), > II(ImplicitOps | Priv, em_clts, clts), N, > DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, > N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, > @@ -4402,8 +4404,8 @@ static const struct opcode twobyte_table[256] = { > IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), > II(ImplicitOps | Priv, em_rdmsr, rdmsr), > IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), > - I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter), > - I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit), > + I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk, em_sysenter), > + I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit), > N, N, > N, N, N, N, N, N, N, N, > /* 0x40 - 0x4F */ > @@ -4941,6 +4943,40 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int > if (ctxt->d == 0) > return EMULATION_FAILED; > > + /* > + * Reject emulation if KVM might need to emulate shadow stack updates > + * and/or indirect branch tracking enforcement, which the emulator > + * doesn't support. > + */ > + if (opcode.flags & (ShadowStack | IndirBrnTrk) && > + ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) { > + u64 u_cet = 0, s_cet = 0; > + > + /* > + * Check both User and Supervisor on far transfers as inter- > + * privilege level transfers are impacted by CET at the target > + * privilege levels, and that is not known at this time. The > + * the expectation is that the guest will not require emulation > + * of any CET-affected instructions at any privilege level. > + */ > + if (!(opcode.flags & NearBranch)) > + u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN; > + else if (ctxt->ops->cpl(ctxt) == 3) > + u_cet = CET_SHSTK_EN | CET_ENDBR_EN; > + else > + s_cet = CET_SHSTK_EN | CET_ENDBR_EN; > + > + if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) || > + (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))) > + return EMULATION_FAILED; > + > + if ((u_cet | s_cet) & CET_SHSTK_EN && opcode.flags & ShadowStack) > + return EMULATION_FAILED; > + > + if ((u_cet | s_cet) & CET_ENDBR_EN && opcode.flags & IndirBrnTrk) > + return EMULATION_FAILED; > + } I'm not sure other than 'jmp far' case I pointed above, if any more instruction/case that are protected by shadow stack or IBT are missed. (I'm not really good at identifying all of them. Just identify one case drains my energy) At least, the part to return EMULATION_FAILED for the cases where shadow stack/IBT protection is needed looks good to me. So, for this part: Reviewed-by: Xiaoyao Li <xiaoyao.li@Intel.com> > ctxt->execute = opcode.u.execute; > > if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
>> >> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c >> index 542d3664afa3..e4be54a677b0 100644 >> --- a/arch/x86/kvm/emulate.c >> +++ b/arch/x86/kvm/emulate.c >> @@ -178,6 +178,8 @@ >> #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ >> #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ >> #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */ >> +#define ShadowStack ((u64)1 << 57) /* Instruction protected by Shadow Stack. */ >> +#define IndirBrnTrk ((u64)1 << 58) /* Instruction protected by IBT. */ >> #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) >> @@ -4068,9 +4070,9 @@ static const struct opcode group4[] = { >> static const struct opcode group5[] = { >> F(DstMem | SrcNone | Lock, em_inc), >> F(DstMem | SrcNone | Lock, em_dec), >> - I(SrcMem | NearBranch | IsBranch, em_call_near_abs), >> - I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far), >> - I(SrcMem | NearBranch | IsBranch, em_jmp_abs), >> + I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk, em_call_near_abs), >> + I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk, em_call_far), >> + I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs), > >> I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far), > >It seems this entry for 'FF 05' (Jump far, absolute indirect) needs to set >ShadowStack and IndirBrnTrk as well? Yes. I just checked the pseudo code of the JMP instruction in SDM vol2. A far jump to a CONFORMING-CODE-SEGMENT or NONCONFORMING-CODE-SEGMENT is affected by both shadow stack and IBT, and a far jump to a call gate is affected by IBT.
On Thu, Sep 18, 2025, Chao Gao wrote: > >> > >> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > >> index 542d3664afa3..e4be54a677b0 100644 > >> --- a/arch/x86/kvm/emulate.c > >> +++ b/arch/x86/kvm/emulate.c > >> @@ -178,6 +178,8 @@ > >> #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ > >> #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ > >> #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */ > >> +#define ShadowStack ((u64)1 << 57) /* Instruction protected by Shadow Stack. */ > >> +#define IndirBrnTrk ((u64)1 << 58) /* Instruction protected by IBT. */ > >> #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) > >> @@ -4068,9 +4070,9 @@ static const struct opcode group4[] = { > >> static const struct opcode group5[] = { > >> F(DstMem | SrcNone | Lock, em_inc), > >> F(DstMem | SrcNone | Lock, em_dec), > >> - I(SrcMem | NearBranch | IsBranch, em_call_near_abs), > >> - I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far), > >> - I(SrcMem | NearBranch | IsBranch, em_jmp_abs), > >> + I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk, em_call_near_abs), > >> + I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk, em_call_far), > >> + I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs), > > > >> I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far), > > > >It seems this entry for 'FF 05' (Jump far, absolute indirect) needs to set > >ShadowStack and IndirBrnTrk as well? > > Yes. I just checked the pseudo code of the JMP instruction in SDM vol2. A far > jump to a CONFORMING-CODE-SEGMENT or NONCONFORMING-CODE-SEGMENT is affected by > both shadow stack and IBT, and a far jump to a call gate is affected by IBT. The SHSTK interaction is only a #GP condition though, and it's not _that_ awful to emulation. While somewhat silly, I think it makes sense to reject FAR JMP if its IBT, but implement the SHSTK check. Rejecting a JMP instruction for SHSTK is weird/confusing (though definitely easier).
On Fri, Sep 12, 2025 at 04:22:56PM -0700, Sean Christopherson wrote: >From: Yang Weijiang <weijiang.yang@intel.com> > >Don't emulate branch instructions, e.g. CALL/RET/JMP etc., that are >affected by Shadow Stacks and/or Indirect Branch Tracking when said >features are enabled in the guest, as fully emulating CET would require >significant complexity for no practical benefit (KVM shouldn't need to >emulate branch instructions on modern hosts). Simply doing nothing isn't >an option as that would allow a malicious entity to subvert CET >protections via the emulator. > >Note! On far transfers, do NOT consult the current privilege level and >instead treat SHSTK/IBT as being enabled if they're enabled for User *or* >Supervisor mode. On inter-privilege level far transfers, SHSTK and IBT >can be in play for the target privilege level, i.e. checking the current >privilege could get a false negative, and KVM doesn't know the target >privilege level until emulation gets under way. I modified KUT's cet.c to verify that near jumps, near returns, and far transfers (e.g., IRET) trigger the emulation failure logic added by this patch when guests enable Shadow Stack or IBT. I found only one minor issue: near return instructions were not tagged with ShadowStack. The following diff fixes this issue: diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e4be54a677b0..b1c9816bd5c6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4326,8 +4326,8 @@ static const struct opcode opcode_table[256] = { X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), - I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm), - I(ImplicitOps | NearBranch | IsBranch, em_ret), + I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm), + I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), G(ByteOp, group11), G(0, group11), And for reference, below are the changes I made to KUT's cet.c diff --git a/x86/cet.c b/x86/cet.c index 42d2b1fc..ff6b17f6 100644 --- a/x86/cet.c +++ b/x86/cet.c @@ -30,6 +30,8 @@ static u64 cet_shstk_func(void) */ printf("Try to temper the return-address, this causes #CP on returning...\n"); *(ret_addr + 1) = 0xdeaddead; + /* Verify that near return causes emulation failure */ + asm volatile (KVM_FEP "ret\n"); return 0; } @@ -45,7 +47,8 @@ static u64 cet_ibt_func(void) asm volatile ("movq $2, %rcx\n" "dec %rcx\n" "leaq 2f(%rip), %rax\n" - "jmp *%rax \n" + /* Verify that near jmp causes emulation failure */ + KVM_FEP "jmp *%rax \n" "2:\n" "dec %rcx\n"); return 0; @@ -111,6 +114,12 @@ int main(int ac, char **av) /* Enable CET master control bit in CR4. */ write_cr4(read_cr4() | X86_CR4_CET); + /* + * Verify "Far transfers" causes emulation failure even if shadow + * stack isn't enabled for the current privilege level + */ + asm volatile (KVM_FEP "iret\n"); + printf("Unit test for CET user mode...\n"); run_in_user((usermode_func)cet_shstk_func, GP_VECTOR, 0, 0, 0, 0, &rvc); report(cp_count == 1, "Completed shadow-stack protection test successfully.");
On Wed, Sep 17, 2025, Chao Gao wrote: > On Fri, Sep 12, 2025 at 04:22:56PM -0700, Sean Christopherson wrote: > >From: Yang Weijiang <weijiang.yang@intel.com> > > > >Don't emulate branch instructions, e.g. CALL/RET/JMP etc., that are > >affected by Shadow Stacks and/or Indirect Branch Tracking when said > >features are enabled in the guest, as fully emulating CET would require > >significant complexity for no practical benefit (KVM shouldn't need to > >emulate branch instructions on modern hosts). Simply doing nothing isn't > >an option as that would allow a malicious entity to subvert CET > >protections via the emulator. > > > >Note! On far transfers, do NOT consult the current privilege level and > >instead treat SHSTK/IBT as being enabled if they're enabled for User *or* > >Supervisor mode. On inter-privilege level far transfers, SHSTK and IBT > >can be in play for the target privilege level, i.e. checking the current > >privilege could get a false negative, and KVM doesn't know the target > >privilege level until emulation gets under way. > > I modified KUT's cet.c to verify that near jumps, near returns, and far > transfers (e.g., IRET) trigger the emulation failure logic added by this > patch when guests enable Shadow Stack or IBT. > > I found only one minor issue: near return instructions were not tagged with > ShadowStack. Heh, I had just found this through inspection. > The following diff fixes this issue: > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index e4be54a677b0..b1c9816bd5c6 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -4326,8 +4326,8 @@ static const struct opcode opcode_table[256] = { > X8(I(DstReg | SrcImm64 | Mov, em_mov)), > /* 0xC0 - 0xC7 */ > G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), > - I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm), > - I(ImplicitOps | NearBranch | IsBranch, em_ret), > + I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm), > + I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret), Tangentially directly related to this bug, I think we should manual annotation where possible. I don't see an easy way to do that for ShadowStack, but for IBT we can use IsBranch, NearBranch and the SrcXXX operance to detect IBT-affected instructions. It's obviously more complex, but programmatically detecting indirect branches should be less error prone. I'll do so in the next version. > I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), > I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), > G(ByteOp, group11), G(0, group11), > > > And for reference, below are the changes I made to KUT's cet.c I now have a more comprehensive set of testcases, and it can be upstreamed (relies on KVM's default behavior of injecting #UD at CPL==3 on failed emulation).
>> --- a/arch/x86/kvm/emulate.c >> +++ b/arch/x86/kvm/emulate.c >> @@ -4326,8 +4326,8 @@ static const struct opcode opcode_table[256] = { >> X8(I(DstReg | SrcImm64 | Mov, em_mov)), >> /* 0xC0 - 0xC7 */ >> G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), >> - I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm), >> - I(ImplicitOps | NearBranch | IsBranch, em_ret), >> + I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm), >> + I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret), > >Tangentially directly related to this bug, I think we should manual annotation >where possible. I don't see an easy way to do that for ShadowStack, but for IBT >we can use IsBranch, NearBranch and the SrcXXX operance to detect IBT-affected >instructions. It's obviously more complex, but programmatically detecting >indirect branches should be less error prone. I'll do so in the next version. > >> I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), >> I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), >> G(ByteOp, group11), G(0, group11), >> >> >> And for reference, below are the changes I made to KUT's cet.c > >I now have a more comprehensive set of testcases, and it can be upstreamed >(relies on KVM's default behavior of injecting #UD at CPL==3 on failed emulation). IIUC, for KVM_FEP-prefixed instructions, the emulation type is set to EMULTYPE_TRAP_UD_FORCED. Regardless of the CPL and KVM_CAP_EXIT_ON_EMULATION_FAILURE, KVM will always inject #UD on failed emulation. r = x86_decode_emulated_instruction(vcpu, emulation_type, insn, insn_len); if (r != EMULATION_OK) { if ((emulation_type & EMULTYPE_TRAP_UD) || (emulation_type & EMULTYPE_TRAP_UD_FORCED)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; }
On Thu, Sep 18, 2025, Chao Gao wrote: > >> --- a/arch/x86/kvm/emulate.c > >> +++ b/arch/x86/kvm/emulate.c > >> @@ -4326,8 +4326,8 @@ static const struct opcode opcode_table[256] = { > >> X8(I(DstReg | SrcImm64 | Mov, em_mov)), > >> /* 0xC0 - 0xC7 */ > >> G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), > >> - I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm), > >> - I(ImplicitOps | NearBranch | IsBranch, em_ret), > >> + I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm), > >> + I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret), > > > >Tangentially directly related to this bug, I think we should manual annotation > >where possible. I don't see an easy way to do that for ShadowStack, but for IBT > >we can use IsBranch, NearBranch and the SrcXXX operance to detect IBT-affected > >instructions. It's obviously more complex, but programmatically detecting > >indirect branches should be less error prone. I'll do so in the next version. > > > >> I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), > >> I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), > >> G(ByteOp, group11), G(0, group11), > >> > >> > >> And for reference, below are the changes I made to KUT's cet.c > > > >I now have a more comprehensive set of testcases, and it can be upstreamed > >(relies on KVM's default behavior of injecting #UD at CPL==3 on failed emulation). > > IIUC, for KVM_FEP-prefixed instructions, the emulation type is set to > EMULTYPE_TRAP_UD_FORCED. Regardless of the CPL and > KVM_CAP_EXIT_ON_EMULATION_FAILURE, KVM will always inject #UD on failed > emulation. > > r = x86_decode_emulated_instruction(vcpu, emulation_type, > insn, insn_len); > if (r != EMULATION_OK) { > if ((emulation_type & EMULTYPE_TRAP_UD) || > (emulation_type & EMULTYPE_TRAP_UD_FORCED)) { > kvm_queue_exception(vcpu, UD_VECTOR); > return 1; > } Oh, right. Nice, that makes the KUT testcase much less hacky :-)
© 2016 - 2025 Red Hat, Inc.