Current test_cc() uses the fastop infrastructure to test flags using
SETcc instructions. However, int3_emulate_jcc() already fully
implements the flags->CC mapping, use that.
Removes a pile of gnarly asm.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/text-patching.h | 20 +++++++++++++-------
arch/x86/kvm/emulate.c | 34 ++--------------------------------
2 files changed, 15 insertions(+), 39 deletions(-)
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -176,9 +176,9 @@ void int3_emulate_ret(struct pt_regs *re
}
static __always_inline
-void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+bool __emulate_cc(unsigned long flags, u8 cc)
{
- static const unsigned long jcc_mask[6] = {
+ static const unsigned long cc_mask[6] = {
[0] = X86_EFLAGS_OF,
[1] = X86_EFLAGS_CF,
[2] = X86_EFLAGS_ZF,
@@ -191,15 +191,21 @@ void int3_emulate_jcc(struct pt_regs *re
bool match;
if (cc < 0xc) {
- match = regs->flags & jcc_mask[cc >> 1];
+ match = flags & cc_mask[cc >> 1];
} else {
- match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
- ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+ match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+ ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
if (cc >= 0xe)
- match = match || (regs->flags & X86_EFLAGS_ZF);
+ match = match || (flags & X86_EFLAGS_ZF);
}
- if ((match && !invert) || (!match && invert))
+ return (match && !invert) || (!match && invert);
+}
+
+static __always_inline
+void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+{
+ if (__emulate_cc(regs->flags, cc))
ip += disp;
int3_emulate_jmp(regs, ip);
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -26,6 +26,7 @@
#include <asm/debugreg.h>
#include <asm/nospec-branch.h>
#include <asm/ibt.h>
+#include <asm/text-patching.h>
#include "x86.h"
#include "tss.h"
@@ -416,31 +417,6 @@ static int fastop(struct x86_emulate_ctx
ON64(FOP3E(op##q, rax, rdx, cl)) \
FOP_END
-/* Special case for SETcc - 1 instruction per cc */
-#define FOP_SETCC(op) \
- FOP_FUNC(op) \
- #op " %al \n\t" \
- FOP_RET(op)
-
-FOP_START(setcc)
-FOP_SETCC(seto)
-FOP_SETCC(setno)
-FOP_SETCC(setc)
-FOP_SETCC(setnc)
-FOP_SETCC(setz)
-FOP_SETCC(setnz)
-FOP_SETCC(setbe)
-FOP_SETCC(setnbe)
-FOP_SETCC(sets)
-FOP_SETCC(setns)
-FOP_SETCC(setp)
-FOP_SETCC(setnp)
-FOP_SETCC(setl)
-FOP_SETCC(setnl)
-FOP_SETCC(setle)
-FOP_SETCC(setnle)
-FOP_END;
-
FOP_START(salc)
FOP_FUNC(salc)
"pushf; sbb %al, %al; popf \n\t"
@@ -1064,13 +1040,7 @@ static int em_bsr_c(struct x86_emulate_c
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
{
- u8 rc;
- void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
-
- flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
- asm("push %[flags]; popf; " CALL_NOSPEC
- : "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags));
- return rc;
+ return __emulate_cc(flags, condition & 0xf);
}
static void fetch_register_operand(struct operand *op)
Can you use "KVM: x86" for the scope? "x86/kvm" is used for guest changes, i.e. for paravirt code when running as a KVM guest. On Mon, Nov 11, 2024, Peter Zijlstra wrote: > Current test_cc() uses the fastop infrastructure to test flags using > SETcc instructions. However, int3_emulate_jcc() already fully > implements the flags->CC mapping, use that. I think it's worth presenting this as a revert of sorts, even though it's not a strict revert. KVM also emulated jcc-like operations in software prior to commit 9ae9febae950 ("KVM: x86 emulator: covert SETCC to fastop"), i.e. the fastop madness was introduced for performance reasons, not because writing the code was hard. Looking at the output of the fastop versus __emulate_cc(), the worst case cost is two extra conditional branches. Assuming that eliminating the test_cc() fastop code avoids having to add another special case in objtool, I am a-ok with the tradeoff. Especially since emulating instructions that use test_cc() is largely limited to older hardware running guest firmware that uses (emulated) SMM, and maybe a few bespoke use cases. E.g. I get literally zero hits on test_cc() when booting a 24 vCPU VM (64-bit or 32-bit kernel) with EPT and unrestricted guest disabled, as the OVMF build I use doesn't rely on SMM. And FWIW, a straight revert appears to generate worse code. I see no reason to bring it back. With a massaged shortlog+changelog, Acked-by: Sean Christopherson <seanjc@google.com> fastop: 0x0000000000042c41 <+1537>: movzbl 0x61(%rbp),%eax 0x0000000000042c45 <+1541>: mov 0x10(%rbp),%rdx 0x0000000000042c49 <+1545>: shl $0x4,%rax 0x0000000000042c4d <+1549>: and $0xf0,%eax 0x0000000000042c52 <+1554>: and $0x8d5,%edx 0x0000000000042c58 <+1560>: or $0x2,%dh 0x0000000000042c5b <+1563>: add $0x0,%rax 0x0000000000042c61 <+1569>: push %rdx 0x0000000000042c62 <+1570>: popf 0x0000000000042c63 <+1571>: call 0x42c68 <x86_emulate_insn+1576> 0x0000000000042c68 <+1576>: mov %eax,%edx 0x0000000000042c6a <+1578>: xor %eax,%eax 0x0000000000042c6c <+1580>: test %dl,%dl 0x0000000000042c6e <+1582>: jne 0x42f24 <x86_emulate_insn+2276> __emulate_cc: 0x0000000000042b95 <+1541>: movzbl 0x61(%rbp),%eax 0x0000000000042b99 <+1545>: mov 0x10(%rbp),%rcx 0x0000000000042b9d <+1549>: and $0xf,%eax 0x0000000000042ba0 <+1552>: cmp $0xb,%al 0x0000000000042ba2 <+1554>: ja 0x42e90 <x86_emulate_insn+2304> 0x0000000000042e90 <+2304>: mov %rcx,%rdx 0x0000000000042e93 <+2307>: mov %rcx,%rsi 0x0000000000042e96 <+2310>: shr $0x7,%rdx 0x0000000000042e9a <+2314>: shr $0xb,%rsi 0x0000000000042e9e <+2318>: xor %rsi,%rdx 0x0000000000042ea1 <+2321>: cmp $0xd,%al 0x0000000000042ea3 <+2323>: ja 0x4339a <x86_emulate_insn+3594> 0x000000000004339a <+3594>: and $0x1,%edx 0x000000000004339d <+3597>: and $0x40,%ecx 0x00000000000433a0 <+3600>: or %rcx,%rdx 0x00000000000433a3 <+3603>: setne %dl 0x00000000000433a6 <+3606>: jmp 0x42bba <x86_emulate_insn+1578> 0x0000000000042ea9 <+2329>: and $0x1,%edx 0x0000000000042eac <+2332>: jmp 0x42bba <x86_emulate_insn+1578> 0x0000000000042ba8 <+1560>: mov %eax,%edx 0x0000000000042baa <+1562>: shr %dl 0x0000000000042bac <+1564>: and $0x7,%edx 0x0000000000042baf <+1567>: and 0x0(,%rdx,8),%rcx 0x0000000000042bb7 <+1575>: setne %dl 0x0000000000042bba <+1578>: test $0x1,%al 0x0000000000042bbc <+1580>: jne 0x43323 <x86_emulate_insn+3475> 0x0000000000043323 <+3475>: test %dl,%dl 0x0000000000043325 <+3477>: jne 0x4332f <x86_emulate_insn+3487> 0x0000000000043327 <+3479>: test $0x1,%al 0x0000000000043329 <+3481>: jne 0x42bca <x86_emulate_insn+1594> 0x000000000004332f <+3487>: xor %eax,%eax 0x0000000000043331 <+3489>: jmp 0x42be0 <x86_emulate_insn+1616> 0x0000000000042bc2 <+1586>: test %dl,%dl 0x0000000000042bc4 <+1588>: je 0x43327 <x86_emulate_insn+3479> 0x0000000000043327 <+3479>: test $0x1,%al 0x0000000000043329 <+3481>: jne 0x42bca <x86_emulate_insn+1594> 0x000000000004332f <+3487>: xor %eax,%eax 0x0000000000043331 <+3489>: jmp 0x42be0 <x86_emulate_insn+1616> 0x0000000000042bca <+1594>: movslq 0xd0(%rbp),%rsi 0x0000000000042bd1 <+1601>: mov %rbp,%rdi 0x0000000000042bd4 <+1604>: add 0x90(%rbp),%rsi 0x0000000000042bdb <+1611>: call 0x3a7c0 <assign_eip> revert: 0x0000000000042bc9 <+1545>: movzbl 0x61(%rbp),%esi 0x0000000000042bcd <+1549>: mov 0x10(%rbp),%rdx 0x0000000000042bd1 <+1553>: mov %esi,%eax 0x0000000000042bd3 <+1555>: shr %eax 0x0000000000042bd5 <+1557>: and $0x7,%eax 0x0000000000042bd8 <+1560>: cmp $0x4,%eax 0x0000000000042bdb <+1563>: je 0x42ed4 <x86_emulate_insn+2324> 0x0000000000042ed4 <+2324>: mov %edx,%ecx 0x0000000000042ed6 <+2326>: and $0x80,%ecx 0x0000000000042edc <+2332>: jmp 0x42bff <x86_emulate_insn+1599> 0x0000000000042be1 <+1569>: ja 0x43225 <x86_emulate_insn+3173> 0x0000000000043225 <+3173>: cmp $0x6,%eax 0x0000000000043228 <+3176>: je 0x434ec <x86_emulate_insn+3884> 0x00000000000434ec <+3884>: xor %edi,%edi 0x00000000000434ee <+3886>: jmp 0x43238 <x86_emulate_insn+3192> 0x000000000004322e <+3182>: mov %edx,%edi 0x0000000000043230 <+3184>: and $0x40,%edi 0x0000000000043233 <+3187>: cmp $0x7,%eax 0x0000000000043236 <+3190>: jne 0x4325c <x86_emulate_insn+3228> 0x000000000004325c <+3228>: mov %edx,%ecx 0x000000000004325e <+3230>: and $0x4,%ecx 0x0000000000043261 <+3233>: cmp $0x5,%eax 0x0000000000043264 <+3236>: je 0x42bff <x86_emulate_insn+1599> 0x000000000004326a <+3242>: jmp 0x43218 <x86_emulate_insn+3160> 0x0000000000043238 <+3192>: mov %rdx,%rcx 0x000000000004323b <+3195>: shr $0xb,%rdx 0x000000000004323f <+3199>: shr $0x7,%rcx 0x0000000000043243 <+3203>: xor %edx,%ecx 0x0000000000043245 <+3205>: and $0x1,%ecx 0x0000000000043248 <+3208>: or %edi,%ecx 0x000000000004324a <+3210>: jmp 0x42bff <x86_emulate_insn+1599> 0x0000000000042be7 <+1575>: mov %edx,%ecx 0x0000000000042be9 <+1577>: and $0x40,%ecx 0x0000000000042bec <+1580>: cmp $0x2,%eax 0x0000000000042bef <+1583>: je 0x42bff <x86_emulate_insn+1599> 0x0000000000042bf1 <+1585>: mov %edx,%ecx 0x0000000000042bf3 <+1587>: and $0x41,%ecx 0x0000000000042bf6 <+1590>: cmp $0x3,%eax 0x0000000000042bf9 <+1593>: jne 0x4320a <x86_emulate_insn+3146> 0x000000000004320a <+3146>: mov %edx,%ecx 0x000000000004320c <+3148>: and $0x1,%ecx 0x000000000004320f <+3151>: cmp $0x1,%eax 0x0000000000043212 <+3154>: je 0x42bff <x86_emulate_insn+1599> 0x0000000000043218 <+3160>: mov %edx,%ecx 0x000000000004321a <+3162>: and $0x800,%ecx 0x0000000000043220 <+3168>: jmp 0x42bff <x86_emulate_insn+1599> 0x0000000000042bff <+1599>: test %ecx,%ecx 0x0000000000042c01 <+1601>: setne %dl 0x0000000000042c04 <+1604>: and $0x1,%esi 0x0000000000042c07 <+1607>: xor %eax,%eax 0x0000000000042c09 <+1609>: cmp %sil,%dl 0x0000000000042c0c <+1612>: je 0x42c24 <x86_emulate_insn+1636> 0x0000000000042c0e <+1614>: movslq 0xd0(%rbp),%rsi 0x0000000000042c15 <+1621>: mov %rbp,%rdi 0x0000000000042c18 <+1624>: add 0x90(%rbp),%rsi 0x0000000000042c1f <+1631>: call 0x3a7c0 <assign_eip>
© 2016 - 2024 Red Hat, Inc.