Due to concerns about circumvention attacks against FineIBT on 'naked'
ENDBR, add an additional caller side hash check to FineIBT. This
should make it impossible to pivot over such a 'naked' ENDBR
instruction at the cost of an additional load.
The specific pivot reported was against the SYSCALL entry site and
FRED will have all those holes fixed up.
This specific fineibt_paranoid_start[] sequence was concocted by
Scott.
Reported-by: Jennifer Miller <jmill@asu.edu>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20250215210729.GA25168@noisy.programming.kicks-ass.net
---
arch/x86/include/asm/cfi.h | 8 +-
arch/x86/kernel/alternative.c | 143 ++++++++++++++++++++++++++++++++++++++++--
arch/x86/kernel/cfi.c | 4 -
3 files changed, 143 insertions(+), 12 deletions(-)
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -105,7 +105,7 @@ extern bool cfi_warn;
struct pt_regs;
#ifdef CONFIG_CFI_CLANG
-enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+enum bug_trap_type handle_cfi_failure(int ud_type, struct pt_regs *regs);
#define __bpfcall
extern u32 cfi_bpf_hash;
extern u32 cfi_bpf_subprog_hash;
@@ -128,10 +128,10 @@ static inline int cfi_get_offset(void)
extern u32 cfi_get_func_hash(void *func);
#ifdef CONFIG_FINEIBT
-extern bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type);
+extern bool decode_fineibt_insn(int ud_type, struct pt_regs *regs, unsigned long *target, u32 *type);
#else
static inline bool
-decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+decode_fineibt_insn(int ud_type, struct pt_regs *regs, unsigned long *target, u32 *type)
{
return false;
}
@@ -139,7 +139,7 @@ decode_fineibt_insn(struct pt_regs *regs
#endif
#else
-static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
+static inline enum bug_trap_type handle_cfi_failure(int ud_type, struct pt_regs *regs)
{
return BUG_TRAP_TYPE_NONE;
}
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -741,6 +741,11 @@ void __init_or_module noinline apply_ret
op2 = insn.opcode.bytes[1];
switch (op1) {
+ case 0x70 ... 0x7f: /* Jcc.d8 */
+ /* See cfi_paranoid. */
+ WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
+ continue;
+
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
break;
@@ -995,6 +1000,8 @@ u32 cfi_get_func_hash(void *func)
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;
+static bool cfi_paranoid __ro_after_init = false;
+
/*
* Re-hash the CFI hash with a boot-time seed while making sure the result is
* not a valid ENDBR instruction.
@@ -1037,6 +1044,12 @@ static __init int cfi_parse_cmdline(char
} else if (!strcmp(str, "warn")) {
pr_alert("CFI mismatch non-fatal!\n");
cfi_warn = true;
+ } else if (!strcmp(str, "paranoid")) {
+ if (cfi_mode == CFI_FINEIBT) {
+ cfi_paranoid = true;
+ } else {
+ pr_err("Ignoring paranoid; depends on fineibt.\n");
+ }
} else {
pr_err("Ignoring unknown cfi option (%s).", str);
}
@@ -1116,6 +1129,52 @@ extern u8 fineibt_caller_end[];
#define fineibt_caller_jmp (fineibt_caller_size - 2)
+/*
+ * Since FineIBT does hash validation on the callee side it is prone to
+ * circumvention attacks where a 'naked' ENDBR instruction exists that
+ * is not part of the fineibt_preamble sequence.
+ *
+ * Notably the x86 entry points must be ENDBR and equally cannot be
+ * fineibt_preamble.
+ *
+ * The fineibt_paranoid caller sequence adds additional caller side
+ * hash validation. This stops such circumvetion attacks dead, but at the cost
+ * of adding a load.
+ *
+ * <fineibt_paranoid_start>:
+ * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
+ * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
+ * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11
+ * e: 75 fd jne d <fineibt_paranoid_start+0xd>
+ * 10: 41 ff d3 call *%r11
+ * 13: 90 nop
+ *
+ * Notably LEA does not modify flags and can be reordered with the CMP,
+ * avoiding a dependency. Again, using a non-taken (backwards) branch
+ * for the failure case, abusing LEA's immediate 0xf0 as LOCK prefix for the
+ * JCC.d8, causing #UD.
+ */
+asm( ".pushsection .rodata \n"
+ "fineibt_paranoid_start: \n"
+ " movl $0x12345678, %r10d \n"
+ " cmpl -9(%r11), %r10d \n"
+ " lea -0x10(%r11), %r11 \n"
+ " jne fineibt_paranoid_start+0xd \n"
+ "fineibt_paranoid_ind: \n"
+ " call *%r11 \n"
+ " nop \n"
+ "fineibt_paranoid_end: \n"
+ ".popsection \n"
+);
+
+extern u8 fineibt_paranoid_start[];
+extern u8 fineibt_paranoid_ind[];
+extern u8 fineibt_paranoid_end[];
+
+#define fineibt_paranoid_size (fineibt_paranoid_end - fineibt_paranoid_start)
+#define fineibt_paranoid_ind (fineibt_paranoid_ind - fineibt_paranoid_start)
+#define fineibt_paranoid_ud 0xd
+
static u32 decode_preamble_hash(void *addr)
{
u8 *p = addr;
@@ -1279,18 +1338,48 @@ static int cfi_rewrite_callers(s32 *star
{
s32 *s;
+ BUG_ON(fineibt_paranoid_size != 20);
+
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ struct insn insn;
+ u8 bytes[20];
u32 hash;
+ int ret;
+ u8 op;
addr -= fineibt_caller_size;
hash = decode_caller_hash(addr);
- if (hash) {
+ if (!hash)
+ continue;
+
+ if (!cfi_paranoid) {
text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
text_poke_early(addr + fineibt_caller_hash, &hash, 4);
+ /* rely on apply_retpolines() */
+ continue;
}
- /* rely on apply_retpolines() */
+
+ /* cfi_paranoid */
+ ret = insn_decode_kernel(&insn, addr + fineibt_caller_size);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+ op = insn.opcode.bytes[0];
+ if (op != CALL_INSN_OPCODE && op != JMP32_INSN_OPCODE) {
+ WARN_ON_ONCE(1);
+ continue;
+ }
+
+ memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size);
+ memcpy(bytes + fineibt_caller_hash, &hash, 4);
+
+ ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
+ if (WARN_ON_ONCE(ret != 3))
+ continue;
+
+ text_poke_early(addr, bytes, fineibt_paranoid_size);
}
return 0;
@@ -1307,8 +1396,15 @@ static void __apply_fineibt(s32 *start_r
if (cfi_mode == CFI_AUTO) {
cfi_mode = CFI_KCFI;
- if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
+ if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) {
+ /*
+ * FRED has much saner context on exception entry and
+ * is less easy to take advantage of.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ cfi_paranoid = true;
cfi_mode = CFI_FINEIBT;
+ }
}
/*
@@ -1365,8 +1461,10 @@ static void __apply_fineibt(s32 *start_r
/* now that nobody targets func()+0, remove ENDBR there */
cfi_rewrite_endbr(start_cfi, end_cfi);
- if (builtin)
- pr_info("Using FineIBT CFI\n");
+ if (builtin) {
+ pr_info("Using %sFineIBT CFI\n",
+ cfi_paranoid ? "paranoid " : "");
+ }
return;
default:
@@ -1439,7 +1537,8 @@ static void poison_cfi(void *addr)
* We check the preamble by checking for the ENDBR instruction relative to the
* 0xEA instruction.
*/
-bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+static bool decode_fineibt_preamble(int ud_type, struct pt_regs *regs,
+ unsigned long *target, u32 *type)
{
unsigned long addr = regs->ip - fineibt_preamble_ud;
u32 hash;
@@ -1464,6 +1563,38 @@ bool decode_fineibt_insn(struct pt_regs
return false;
}
+/*
+ * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
+ * sequence.
+ */
+static bool decode_fineibt_paranoid(int ud_type, struct pt_regs *regs,
+ unsigned long *target, u32 *type)
+{
+ unsigned long addr = regs->ip - fineibt_paranoid_ud;
+ u32 hash;
+
+ __get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault);
+ *target = regs->r11 + fineibt_preamble_size;
+ *type = regs->r10;
+
+ /*
+ * Since the trapping instruction is the exact, but LOCK prefixed,
+ * Jcc.d8 that got us here, the normal fixup will work.
+ */
+ return true;
+
+Efault:
+ return false;
+}
+
+bool decode_fineibt_insn(int ud_type, struct pt_regs *regs,
+ unsigned long *target, u32 *type)
+{
+ if (ud_type == BUG_LOCK)
+ return decode_fineibt_paranoid(ud_type, regs, target, type);
+ return decode_fineibt_preamble(ud_type, regs, target, type);
+}
+
#else
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
--- a/arch/x86/kernel/cfi.c
+++ b/arch/x86/kernel/cfi.c
@@ -65,7 +65,7 @@ static bool decode_cfi_insn(struct pt_re
* Checks if a ud2 trap is because of a CFI failure, and handles the trap
* if needed. Returns a bug_trap_type value similarly to report_bug.
*/
-enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
+enum bug_trap_type handle_cfi_failure(int ud_type, struct pt_regs *regs)
{
unsigned long target, addr = regs->ip;
enum bug_trap_type btt;
@@ -82,7 +82,7 @@ enum bug_trap_type handle_cfi_failure(st
break;
case CFI_FINEIBT:
- if (!decode_fineibt_insn(regs, &target, &type))
+ if (!decode_fineibt_insn(ud_type, regs, &target, &type))
return BUG_TRAP_TYPE_NONE;
break;
On Wed, Feb 19, 2025 at 05:21:14PM +0100, Peter Zijlstra wrote: > Due to concerns about circumvention attacks against FineIBT on 'naked' > ENDBR, add an additional caller side hash check to FineIBT. This > should make it impossible to pivot over such a 'naked' ENDBR > instruction at the cost of an additional load. > > The specific pivot reported was against the SYSCALL entry site and > FRED will have all those holes fixed up. > > This specific fineibt_paranoid_start[] sequence was concocted by > Scott. > > Reported-by: Jennifer Miller <jmill@asu.edu> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> With patch 6's misplaced chunk moved, looks good: Reviewed-by: Kees Cook <kees@kernel.org> -- Kees Cook
On 19/02/2025 4:21 pm, Peter Zijlstra wrote: > --- a/arch/x86/include/asm/cfi.h > +++ b/arch/x86/include/asm/cfi.h > @@ -1116,6 +1129,52 @@ extern u8 fineibt_caller_end[]; > > #define fineibt_caller_jmp (fineibt_caller_size - 2) > > +/* > + * Since FineIBT does hash validation on the callee side it is prone to > + * circumvention attacks where a 'naked' ENDBR instruction exists that > + * is not part of the fineibt_preamble sequence. > + * > + * Notably the x86 entry points must be ENDBR and equally cannot be > + * fineibt_preamble. > + * > + * The fineibt_paranoid caller sequence adds additional caller side > + * hash validation. This stops such circumvetion attacks dead, but at the cost > + * of adding a load. > + * > + * <fineibt_paranoid_start>: > + * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d > + * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d > + * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 > + * e: 75 fd jne d <fineibt_paranoid_start+0xd> > + * 10: 41 ff d3 call *%r11 > + * 13: 90 nop > + * > + * Notably LEA does not modify flags and can be reordered with the CMP, > + * avoiding a dependency. Again, using a non-taken (backwards) branch > + * for the failure case, abusing LEA's immediate 0xf0 as LOCK prefix for the > + * JCC.d8, causing #UD. > + */ I don't know what to say. This is equal parts horrifying and beautiful. > +asm( ".pushsection .rodata \n" > + "fineibt_paranoid_start: \n" > + " movl $0x12345678, %r10d \n" > + " cmpl -9(%r11), %r10d \n" > + " lea -0x10(%r11), %r11 \n" > + " jne fineibt_paranoid_start+0xd \n" Maybe `jne . - 3` ? Or perhaps `1: jne 1b - 1` ? Both seem marginally less fragile than tying the reference to fineibt_paranoid_start. ~Andrew
On Wed, 19 Feb 2025 17:31:39 +0000 Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 19/02/2025 4:21 pm, Peter Zijlstra wrote: > > --- a/arch/x86/include/asm/cfi.h > > +++ b/arch/x86/include/asm/cfi.h > > @@ -1116,6 +1129,52 @@ extern u8 fineibt_caller_end[]; > > > > #define fineibt_caller_jmp (fineibt_caller_size - 2) > > > > +/* > > + * Since FineIBT does hash validation on the callee side it is prone to > > + * circumvention attacks where a 'naked' ENDBR instruction exists that > > + * is not part of the fineibt_preamble sequence. > > + * > > + * Notably the x86 entry points must be ENDBR and equally cannot be > > + * fineibt_preamble. > > + * > > + * The fineibt_paranoid caller sequence adds additional caller side > > + * hash validation. This stops such circumvetion attacks dead, but at the cost > > + * of adding a load. > > + * > > + * <fineibt_paranoid_start>: > > + * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d > > + * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d > > + * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 I think that 0x10 is the size of the cfi premable? There should probably be at least a comment to that effect. (Maybe there is, but I'm missing the actual patch email.) > > + * e: 75 fd jne d <fineibt_paranoid_start+0xd> > > + * 10: 41 ff d3 call *%r11 > > + * 13: 90 nop > > + * > > + * Notably LEA does not modify flags and can be reordered with the CMP, > > + * avoiding a dependency. Is that even worth saying? Given that the cpu does 'register renaming' the lea might execute in the same clock as the mov. What you do get is a few clocks of stall (maybe 4 if in L1 cache, but a data read of code memory is unlikely to be there - so it'll be from the L2 cache) for the memory load. That means that the jne is speculatively executed (and I think that is separate from any prefetch speculation), I'll give it 50% taken. (Or maybe 100% if backwards branches get predicted taken. I don't think current Intel cpu do that - they just use whatever in in the branch prediction slot.) > > + * Again, using a non-taken (backwards) branch > > + * for the failure case, abusing LEA's immediate 0xf0 as LOCK prefix for the > > + * JCC.d8, causing #UD. > > + */ > > I don't know what to say. This is equal parts horrifying and beautiful. Agreed. Are you absolutely sure that all cpu have (and will) always #UD the unexpected LOCK prefix on a Jcc instruction. My 80386 book does say it will #UD, but I can imagine it being ignored or even repurposed. David
On Wed, Feb 19, 2025 at 05:31:39PM +0000, Andrew Cooper wrote: > On 19/02/2025 4:21 pm, Peter Zijlstra wrote: > > --- a/arch/x86/include/asm/cfi.h > > +++ b/arch/x86/include/asm/cfi.h > > @@ -1116,6 +1129,52 @@ extern u8 fineibt_caller_end[]; > > > > #define fineibt_caller_jmp (fineibt_caller_size - 2) > > > > +/* > > + * Since FineIBT does hash validation on the callee side it is prone to > > + * circumvention attacks where a 'naked' ENDBR instruction exists that > > + * is not part of the fineibt_preamble sequence. > > + * > > + * Notably the x86 entry points must be ENDBR and equally cannot be > > + * fineibt_preamble. > > + * > > + * The fineibt_paranoid caller sequence adds additional caller side > > + * hash validation. This stops such circumvetion attacks dead, but at the cost > > + * of adding a load. > > + * > > + * <fineibt_paranoid_start>: > > + * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d > > + * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d > > + * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 > > + * e: 75 fd jne d <fineibt_paranoid_start+0xd> > > + * 10: 41 ff d3 call *%r11 > > + * 13: 90 nop > > + * > > + * Notably LEA does not modify flags and can be reordered with the CMP, > > + * avoiding a dependency. Again, using a non-taken (backwards) branch > > + * for the failure case, abusing LEA's immediate 0xf0 as LOCK prefix for the > > + * JCC.d8, causing #UD. > > + */ > > I don't know what to say. This is equal parts horrifying and beautiful. > > > +asm( ".pushsection .rodata \n" > > + "fineibt_paranoid_start: \n" > > + " movl $0x12345678, %r10d \n" > > + " cmpl -9(%r11), %r10d \n" > > + " lea -0x10(%r11), %r11 \n" > > + " jne fineibt_paranoid_start+0xd \n" > > Maybe `jne . - 3` ? > > Or perhaps `1: jne 1b - 1` ? > > Both seem marginally less fragile than tying the reference to > fineibt_paranoid_start. Right, so I initially had '. - 3' (and '. - 7' in fineibt_preamble_start), but I ended up going with this form because that's what you end up with if you run it through an assembler and disassembler. So I figured this form was easier to compare vs disassembly, which is what most people will see if they ever look at this. Anyway, I'm happy to go '. - 3' again if that's preferred.
© 2016 - 2025 Red Hat, Inc.