[PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET

Chao Gao posted 22 patches 3 weeks, 2 days ago
[PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET
Posted by Chao Gao 3 weeks, 2 days ago
From: Yang Weijiang <weijiang.yang@intel.com>

Don't emulate the branch instructions, e.g., CALL/RET/JMP etc., when CET
is active in guest, return KVM_INTERNAL_ERROR_EMULATION to userspace to
handle it.

KVM doesn't emulate CPU behaviors to check CET protected stuffs while
emulating guest instructions, instead it stops emulation on detecting
the instructions in process are CET protected. By doing so, it can avoid
generating bogus #CP in guest and preventing CET protected execution flow
subversion from guest side.

Suggested-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Tested-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
---
 arch/x86/kvm/emulate.c | 46 ++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 542d3664afa3..97a4d1e69583 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -178,6 +178,8 @@
 #define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
 #define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
 #define IsBranch    ((u64)1 << 56)  /* Instruction is considered a branch. */
+#define ShadowStack ((u64)1 << 57)  /* Instruction protected by Shadow Stack. */
+#define IndirBrnTrk ((u64)1 << 58)  /* Instruction protected by IBT. */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -4068,9 +4070,11 @@ static const struct opcode group4[] = {
 static const struct opcode group5[] = {
 	F(DstMem | SrcNone | Lock,		em_inc),
 	F(DstMem | SrcNone | Lock,		em_dec),
-	I(SrcMem | NearBranch | IsBranch,       em_call_near_abs),
-	I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
-	I(SrcMem | NearBranch | IsBranch,       em_jmp_abs),
+	I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk,
+	em_call_near_abs),
+	I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk,
+	em_call_far),
+	I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs),
 	I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
 	I(SrcMem | Stack | TwoMemOp,		em_push), D(Undefined),
 };
@@ -4332,11 +4336,11 @@ static const struct opcode opcode_table[256] = {
 	/* 0xC8 - 0xCF */
 	I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
 	I(Stack | IsBranch, em_leave),
-	I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
-	I(ImplicitOps | IsBranch, em_ret_far),
-	D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
+	I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
+	I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
+	D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
 	D(ImplicitOps | No64 | IsBranch),
-	II(ImplicitOps | IsBranch, em_iret, iret),
+	II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
 	/* 0xD0 - 0xD7 */
 	G(Src2One | ByteOp, group2), G(Src2One, group2),
 	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
@@ -4352,7 +4356,7 @@ static const struct opcode opcode_table[256] = {
 	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
 	/* 0xE8 - 0xEF */
-	I(SrcImm | NearBranch | IsBranch, em_call),
+	I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
 	D(SrcImm | ImplicitOps | NearBranch | IsBranch),
 	I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
 	D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
@@ -4371,7 +4375,8 @@ static const struct opcode opcode_table[256] = {
 static const struct opcode twobyte_table[256] = {
 	/* 0x00 - 0x0F */
 	G(0, group6), GD(0, &group7), N, N,
-	N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
+	N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk,
+	em_syscall),
 	II(ImplicitOps | Priv, em_clts, clts), N,
 	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
 	N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
@@ -4402,8 +4407,9 @@ static const struct opcode twobyte_table[256] = {
 	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
 	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
 	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
-	I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
-	I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
+	I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk,
+	em_sysenter),
+	I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
 	N, N,
 	N, N, N, N, N, N, N, N,
 	/* 0x40 - 0x4F */
@@ -4941,6 +4947,24 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 	if (ctxt->d == 0)
 		return EMULATION_FAILED;
 
+	if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
+		u64 u_cet, s_cet;
+		bool stop_em;
+
+		if (ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet) ||
+		    ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))
+			return EMULATION_FAILED;
+
+		stop_em = ((u_cet & CET_SHSTK_EN) || (s_cet & CET_SHSTK_EN)) &&
+			  (opcode.flags & ShadowStack);
+
+		stop_em |= ((u_cet & CET_ENDBR_EN) || (s_cet & CET_ENDBR_EN)) &&
+			   (opcode.flags & IndirBrnTrk);
+
+		if (stop_em)
+			return EMULATION_FAILED;
+	}
+
 	ctxt->execute = opcode.u.execute;
 
 	if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
-- 
2.47.3
Re: [PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET
Posted by Sean Christopherson 2 weeks, 6 days ago
On Tue, Sep 09, 2025, Chao Gao wrote:
> @@ -4068,9 +4070,11 @@ static const struct opcode group4[] = {
>  static const struct opcode group5[] = {
>  	F(DstMem | SrcNone | Lock,		em_inc),
>  	F(DstMem | SrcNone | Lock,		em_dec),
> -	I(SrcMem | NearBranch | IsBranch,       em_call_near_abs),
> -	I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
> -	I(SrcMem | NearBranch | IsBranch,       em_jmp_abs),
> +	I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk,
> +	em_call_near_abs),

Argh, these wraps are killing me.  I spent a good 20 seconds staring at the code
trying to figure out which instructions are affected.  There's definitely a bit
of -ENOCOFFEE going on, but there's also zero reason to wrap.

> +	I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk,
> +	em_call_far),
> +	I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs),
>  	I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
>  	I(SrcMem | Stack | TwoMemOp,		em_push), D(Undefined),
>  };
Re: [PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET
Posted by Xiaoyao Li 3 weeks ago
On 9/9/2025 5:39 PM, Chao Gao wrote:
> From: Yang Weijiang <weijiang.yang@intel.com>
> 
> Don't emulate the branch instructions, e.g., CALL/RET/JMP etc., when CET
> is active in guest, return KVM_INTERNAL_ERROR_EMULATION to userspace to
> handle it.
> 
> KVM doesn't emulate CPU behaviors to check CET protected stuffs while
> emulating guest instructions, instead it stops emulation on detecting
> the instructions in process are CET protected. By doing so, it can avoid
> generating bogus #CP in guest and preventing CET protected execution flow
> subversion from guest side.
> 
> Suggested-by: Chao Gao <chao.gao@intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> Tested-by: Mathias Krause <minipli@grsecurity.net>
> Tested-by: John Allen <john.allen@amd.com>
> Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> ---
>   arch/x86/kvm/emulate.c | 46 ++++++++++++++++++++++++++++++++----------
>   1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 542d3664afa3..97a4d1e69583 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -178,6 +178,8 @@
>   #define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
>   #define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
>   #define IsBranch    ((u64)1 << 56)  /* Instruction is considered a branch. */
> +#define ShadowStack ((u64)1 << 57)  /* Instruction protected by Shadow Stack. */
> +#define IndirBrnTrk ((u64)1 << 58)  /* Instruction protected by IBT. */
>   
>   #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
>   
> @@ -4068,9 +4070,11 @@ static const struct opcode group4[] = {
>   static const struct opcode group5[] = {
>   	F(DstMem | SrcNone | Lock,		em_inc),
>   	F(DstMem | SrcNone | Lock,		em_dec),
> -	I(SrcMem | NearBranch | IsBranch,       em_call_near_abs),
> -	I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
> -	I(SrcMem | NearBranch | IsBranch,       em_jmp_abs),
> +	I(SrcMem | NearBranch | IsBranch | ShadowStack | IndirBrnTrk,
> +	em_call_near_abs),
> +	I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack | IndirBrnTrk,
> +	em_call_far),
> +	I(SrcMem | NearBranch | IsBranch | IndirBrnTrk, em_jmp_abs),
>   	I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
>   	I(SrcMem | Stack | TwoMemOp,		em_push), D(Undefined),
>   };
> @@ -4332,11 +4336,11 @@ static const struct opcode opcode_table[256] = {
>   	/* 0xC8 - 0xCF */
>   	I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
>   	I(Stack | IsBranch, em_leave),
> -	I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
> -	I(ImplicitOps | IsBranch, em_ret_far),
> -	D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
> +	I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
> +	I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
> +	D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
>   	D(ImplicitOps | No64 | IsBranch),
> -	II(ImplicitOps | IsBranch, em_iret, iret),
> +	II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
>   	/* 0xD0 - 0xD7 */
>   	G(Src2One | ByteOp, group2), G(Src2One, group2),
>   	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
> @@ -4352,7 +4356,7 @@ static const struct opcode opcode_table[256] = {
>   	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
>   	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
>   	/* 0xE8 - 0xEF */
> -	I(SrcImm | NearBranch | IsBranch, em_call),
> +	I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
>   	D(SrcImm | ImplicitOps | NearBranch | IsBranch),
>   	I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
>   	D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
> @@ -4371,7 +4375,8 @@ static const struct opcode opcode_table[256] = {
>   static const struct opcode twobyte_table[256] = {
>   	/* 0x00 - 0x0F */
>   	G(0, group6), GD(0, &group7), N, N,
> -	N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
> +	N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk,
> +	em_syscall),
>   	II(ImplicitOps | Priv, em_clts, clts), N,
>   	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
>   	N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
> @@ -4402,8 +4407,9 @@ static const struct opcode twobyte_table[256] = {
>   	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
>   	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
>   	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
> -	I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
> -	I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
> +	I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | IndirBrnTrk,
> +	em_sysenter),
> +	I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
>   	N, N,
>   	N, N, N, N, N, N, N, N,
>   	/* 0x40 - 0x4F */
> @@ -4941,6 +4947,24 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
>   	if (ctxt->d == 0)
>   		return EMULATION_FAILED;
>   
> +	if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
> +		u64 u_cet, s_cet;
> +		bool stop_em;
> +
> +		if (ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet) ||
> +		    ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))
> +			return EMULATION_FAILED;
> +
> +		stop_em = ((u_cet & CET_SHSTK_EN) || (s_cet & CET_SHSTK_EN)) &&
> +			  (opcode.flags & ShadowStack);
> +
> +		stop_em |= ((u_cet & CET_ENDBR_EN) || (s_cet & CET_ENDBR_EN)) &&
> +			   (opcode.flags & IndirBrnTrk);

Why don't check CPL here? Just for simplicity?

> +		if (stop_em)
> +			return EMULATION_FAILED;
> +	}
> +
>   	ctxt->execute = opcode.u.execute;
>   
>   	if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
Re: [PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET
Posted by Chao Gao 3 weeks ago
>> @@ -4941,6 +4947,24 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
>>   	if (ctxt->d == 0)
>>   		return EMULATION_FAILED;
>> +	if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
>> +		u64 u_cet, s_cet;
>> +		bool stop_em;
>> +
>> +		if (ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet) ||
>> +		    ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))
>> +			return EMULATION_FAILED;
>> +
>> +		stop_em = ((u_cet & CET_SHSTK_EN) || (s_cet & CET_SHSTK_EN)) &&
>> +			  (opcode.flags & ShadowStack);
>> +
>> +		stop_em |= ((u_cet & CET_ENDBR_EN) || (s_cet & CET_ENDBR_EN)) &&
>> +			   (opcode.flags & IndirBrnTrk);
>
>Why don't check CPL here? Just for simplicity?

I think so. This is a corner case and we don't want to make it very precise
(and thus complex). The reason is that no one had a strong opinion on whether
to do the CPL check or not. I asked the same question before [*], but I don't
have a strong opinion on this either.

[*]: https://lore.kernel.org/kvm/ZaSQn7RCRTaBK1bc@chao-email/
Re: [PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET
Posted by Xiaoyao Li 2 weeks, 6 days ago
On 9/11/2025 6:42 PM, Chao Gao wrote:
>>> @@ -4941,6 +4947,24 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
>>>    	if (ctxt->d == 0)
>>>    		return EMULATION_FAILED;
>>> +	if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
>>> +		u64 u_cet, s_cet;
>>> +		bool stop_em;
>>> +
>>> +		if (ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet) ||
>>> +		    ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))
>>> +			return EMULATION_FAILED;
>>> +
>>> +		stop_em = ((u_cet & CET_SHSTK_EN) || (s_cet & CET_SHSTK_EN)) &&
>>> +			  (opcode.flags & ShadowStack);
>>> +
>>> +		stop_em |= ((u_cet & CET_ENDBR_EN) || (s_cet & CET_ENDBR_EN)) &&
>>> +			   (opcode.flags & IndirBrnTrk);
>>
>> Why don't check CPL here? Just for simplicity?
> 
> I think so. This is a corner case and we don't want to make it very precise
> (and thus complex). The reason is that no one had a strong opinion on whether
> to do the CPL check or not. I asked the same question before [*], but I don't
> have a strong opinion on this either.

I'm OK with it.

But I think we should at least mention it in the change log. So people 
will know that CPL check is skipped intentionally and maintainers are OK 
with it so the patch was merged, when they dig the history.

> [*]: https://lore.kernel.org/kvm/ZaSQn7RCRTaBK1bc@chao-email/
Re: [PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET
Posted by Sean Christopherson 2 weeks, 6 days ago
On Fri, Sep 12, 2025, Xiaoyao Li wrote:
> On 9/11/2025 6:42 PM, Chao Gao wrote:
> > > > @@ -4941,6 +4947,24 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
> > > >    	if (ctxt->d == 0)
> > > >    		return EMULATION_FAILED;
> > > > +	if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
> > > > +		u64 u_cet, s_cet;
> > > > +		bool stop_em;
> > > > +
> > > > +		if (ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet) ||
> > > > +		    ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))
> > > > +			return EMULATION_FAILED;
> > > > +
> > > > +		stop_em = ((u_cet & CET_SHSTK_EN) || (s_cet & CET_SHSTK_EN)) &&
> > > > +			  (opcode.flags & ShadowStack);
> > > > +
> > > > +		stop_em |= ((u_cet & CET_ENDBR_EN) || (s_cet & CET_ENDBR_EN)) &&
> > > > +			   (opcode.flags & IndirBrnTrk);
> > > 
> > > Why don't check CPL here? Just for simplicity?
> > 
> > I think so. This is a corner case and we don't want to make it very precise

Checking CPL here would not make the code more complex, e.g. naively it could be
something like:

		u64 cet;
		int r;

		if (ctxt->ops->cpl(ctxt) == 3)
			r = ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &cet);
		else
			r = ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &cet);
		if (r)
			return EMULATION_FAILED;

		if (cet & CET_SHSTK_EN && opcode.flags & ShadowStack)
			  return EMULATION_FAILED;

		if (cet & CET_ENDBR_EN && opcode.flags & IndirBrnTrk)
			  return EMULATION_FAILED;

> > (and thus complex). The reason is that no one had a strong opinion on whether
> > to do the CPL check or not. I asked the same question before [*], but I don't
> > have a strong opinion on this either.
> 
> I'm OK with it.

I have a strong opinion.  :-)

KVM must NOT check CPL, because inter-privilege level transfers could trigger
CET emulation and both levels.  E.g. a FAR CALL will be affected by both shadow
stacks and IBT at the target privilege level.

So this need more than just a changelog blurb, it needs a comment.  The code
can also be cleaned up and optimized.  Reading CR4 and two MSRs (via indirect
calls, i.e. potential retpolines) is wasteful for the vast majority of instructions,
and gathering "stop emulation" into a local variable when a positive test is fatal
is pointless.

	/*
	 * Reject emulation if KVM might need to emulate shadow stack updates
	 * and/or indirect branch tracking enforcement, which the emulator
	 * doesn't support.  Deliberately don't check CPL as inter-privilege
	 * level transfers can trigger emulation at both privilege levels, and
	 * the expectation is that the guest will not require emulation of any
	 * CET-affected instructions at any privilege level.
	 */
	if (opcode.flags & (ShadowStack | IndirBrnTrk) &&
	    ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
		u64 u_cet, s_cet;

		if (ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet) ||
		    ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))
			return EMULATION_FAILED;

		if ((u_cet | s_cet) & CET_SHSTK_EN && opcode.flags & ShadowStack)
			  return EMULATION_FAILED;

		if ((u_cet | s_cet) & CET_ENDBR_EN && opcode.flags & IndirBrnTrk)
			  return EMULATION_FAILED;
	}
Re: [PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET
Posted by Sean Christopherson 2 weeks, 6 days ago
On Fri, Sep 12, 2025, Sean Christopherson wrote:
> On Fri, Sep 12, 2025, Xiaoyao Li wrote:
> > On 9/11/2025 6:42 PM, Chao Gao wrote:
> > > (and thus complex). The reason is that no one had a strong opinion on whether
> > > to do the CPL check or not. I asked the same question before [*], but I don't
> > > have a strong opinion on this either.
> > 
> > I'm OK with it.
> 
> I have a strong opinion.  :-)
> 
> KVM must NOT check CPL, because inter-privilege level transfers could trigger
> CET emulation and both levels.  E.g. a FAR CALL will be affected by both shadow
> stacks and IBT at the target privilege level.
> 
> So this need more than just a changelog blurb, it needs a comment.  The code
> can also be cleaned up and optimized.  Reading CR4 and two MSRs (via indirect
> calls, i.e. potential retpolines) is wasteful for the vast majority of instructions,
> and gathering "stop emulation" into a local variable when a positive test is fatal
> is pointless.
> 
> 	/*
> 	 * Reject emulation if KVM might need to emulate shadow stack updates
> 	 * and/or indirect branch tracking enforcement, which the emulator
> 	 * doesn't support.  Deliberately don't check CPL as inter-privilege
> 	 * level transfers can trigger emulation at both privilege levels, and
> 	 * the expectation is that the guest will not require emulation of any
> 	 * CET-affected instructions at any privilege level.
> 	 */
> 	if (opcode.flags & (ShadowStack | IndirBrnTrk) &&
> 	    ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
> 		u64 u_cet, s_cet;
> 
> 		if (ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet) ||
> 		    ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))
> 			return EMULATION_FAILED;
> 
> 		if ((u_cet | s_cet) & CET_SHSTK_EN && opcode.flags & ShadowStack)
> 			  return EMULATION_FAILED;
> 
> 		if ((u_cet | s_cet) & CET_ENDBR_EN && opcode.flags & IndirBrnTrk)
> 			  return EMULATION_FAILED;
> 	}

On second thought, I think it's worth doing the CPL checks.  Explaining why KVM
doesn't bother with checking privilege level is more work than just writing the
code.

	/*
	 * Reject emulation if KVM might need to emulate shadow stack updates
	 * and/or indirect branch tracking enforcement, which the emulator
	 * doesn't support.
	 */
	if (opcode.flags & (ShadowStack | IndirBrnTrk) &&
	    ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
		u64 u_cet = 0, s_cet = 0;

		/*
		 * Check both User and Supervisor on far transfers as inter-
		 * privilege level transfers are impacted by CET at the target
		 * privilege levels, and that is not known at this time.  The
	 	 * the expectation is that the guest will not require emulation
		 * of any CET-affected instructions at any privilege level.
		 */
		if (!(opcode.flags & NearBranch)) {
			u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
		} else if (ctxt->ops->cpl(ctxt) == 3) {
			u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
		} else {
			s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
		}

		if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
		    (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
			return EMULATION_FAILED;

		if ((u_cet | s_cet) & CET_SHSTK_EN && opcode.flags & ShadowStack)
			  return EMULATION_FAILED;

		if ((u_cet | s_cet) & CET_ENDBR_EN && opcode.flags & IndirBrnTrk)
			  return EMULATION_FAILED;
	}

Side topic, has anyone actually tested that this works?  I.e. that attempts to
emulate CET-affected instructions result in emulation failure?  I'd love to have
a selftest for this (hint, hint), but presumably writing one is non-trivial due
to the need to get the selftest compiled with the necessary annotations, setup,
and whatnot.
Re: [PATCH v14 15/22] KVM: x86: Don't emulate instructions guarded by CET
Posted by Chao Gao 2 weeks, 2 days ago
>On second thought, I think it's worth doing the CPL checks.  Explaining why KVM
>doesn't bother with checking privilege level is more work than just writing the
>code.
>
>	/*
>	 * Reject emulation if KVM might need to emulate shadow stack updates
>	 * and/or indirect branch tracking enforcement, which the emulator
>	 * doesn't support.
>	 */
>	if (opcode.flags & (ShadowStack | IndirBrnTrk) &&
>	    ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
>		u64 u_cet = 0, s_cet = 0;
>
>		/*
>		 * Check both User and Supervisor on far transfers as inter-
>		 * privilege level transfers are impacted by CET at the target
>		 * privilege levels, and that is not known at this time.  The
>	 	 * the expectation is that the guest will not require emulation
>		 * of any CET-affected instructions at any privilege level.
>		 */
>		if (!(opcode.flags & NearBranch)) {
>			u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
>		} else if (ctxt->ops->cpl(ctxt) == 3) {
>			u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
>		} else {
>			s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
>		}
>
>		if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
>		    (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
>			return EMULATION_FAILED;
>
>		if ((u_cet | s_cet) & CET_SHSTK_EN && opcode.flags & ShadowStack)
>			  return EMULATION_FAILED;
>
>		if ((u_cet | s_cet) & CET_ENDBR_EN && opcode.flags & IndirBrnTrk)
>			  return EMULATION_FAILED;
>	}
>
>Side topic, has anyone actually tested that this works?  I.e. that attempts to
>emulate CET-affected instructions result in emulation failure?

I haven't. :(

>I'd love to have
>a selftest for this (hint, hint), but presumably writing one is non-trivial due
>to the need to get the selftest compiled with the necessary annotations, setup,
>and whatnot.

Sure. I'll try to write a selftest for this, but I'm unsure about its
complexity. Can you clarify what you mean by "necessary annotations,
setup..."? It seems to me that some simple assembly code, like
test_em_rdmsr(), should work.

For now, I plan to do a quick test by tweaking KUT's cet.c to force
emulation of CET-affected instructions.