[PATCH] x86: re-inline indirect CALL/JMP

Jan Beulich posted 1 patch 2 years, 8 months ago
Failed in applying to current master (apply log)
[PATCH] x86: re-inline indirect CALL/JMP
Posted by Jan Beulich 2 years, 8 months ago
Using the same thunk for heterogeneous calls (i.e. ones to functions
with entirely different parameter types) is a problem: Speculation may
e.g. result in use as pointers for arguments which are are actually
integers. This is a result of target prediction information being tied
to the address of the RET instruction in each thunk (of which we've got
only 15, and of which in turn a fair share of the call sites use the
single one where %rax is holding the target address).

Except when actually using the full retpoline, arrange for alternatives
patching to put the JMP and LFENCE forms of the thunk back inline.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
I'm sure I didn't get the reasoning right / complete; I'd appreciate
clear enough indication of what needs adding/changing.

Some of the changes to may not strictly be necessary:
- INDIRECT_BRANCH: We don't have any uses left in assembly code.
- GEN_INDIRECT_THUNK: Continuing to patch the thunks when they're not
  used is merely pointless. The change, however, is more in anticipation
  that X86_FEATURE_IND_THUNK_{LFENCE,JMP} may not be fully suitable
  names anymore when the code gets inlined (at least I probably wouldn't
  call such "thunk" anymore).

While perhaps not a big problem, I'd like to point out that this more
than doubles the size of the .altinstr_replacement section (with an
accordingly large increase of .altinstructions), unless we were to make
use of "x86/alternatives: allow replacement code snippets to be merged"
(which of course affects only .altinstr_replacement).

--- a/xen/Makefile
+++ b/xen/Makefile
@@ -195,7 +195,7 @@ t2 = $(call as-insn,$(CC) -I$(BASEDIR)/a
 # https://bugs.llvm.org/show_bug.cgi?id=36110
 t3 = $(call as-insn,$(CC),".macro FOO;.endm"$(close); asm volatile $(open)".macro FOO;.endm",-no-integrated-as)
 
-CLANG_FLAGS += $(call or,$(t1),$(t2),$(t3))
+CLANG_FLAGS += $(call or,$(t1),$(t2),$(t3),-DCLANG_INTEGRATED_AS)
 endif
 
 CLANG_FLAGS += -Werror=unknown-warning-option
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -240,7 +240,7 @@ $(obj)/efi/buildid.o $(obj)/efi/relocs-d
 .PHONY: include
 include: $(BASEDIR)/arch/x86/include/asm/asm-macros.h
 
-$(obj)/asm-macros.i: CFLAGS-y += -D__ASSEMBLY__ -P
+$(obj)/asm-macros.i: CFLAGS-y += -D__ASSEMBLY__ -D__ASM_MACROS__ -P
 
 $(BASEDIR)/arch/x86/include/asm/asm-macros.h: $(obj)/asm-macros.i $(src)/Makefile
 	$(call filechk,asm-macros.h)
--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -244,11 +244,19 @@ static void init_or_livepatch _apply_alt
 
             a->priv = 1;
 
-            /* Nothing useful to do? */
-            if ( toolchain_nops_are_ideal || a->pad_len <= 1 )
+            /* No padding at all? */
+            if ( !a->pad_len )
                 continue;
 
-            add_nops(buf, a->pad_len);
+            /* For a JMP to an indirect thunk, replace NOP by INT3. */
+            if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) &&
+                 a->cpuid == X86_FEATURE_IND_THUNK_JMP && orig[0] == 0xe9 )
+                memset(buf, 0xcc, a->pad_len);
+            /* Nothing useful to do? */
+            else if ( toolchain_nops_are_ideal || a->pad_len <= 1 )
+                continue;
+            else
+                add_nops(buf, a->pad_len);
             text_poke(orig + a->orig_len, buf, a->pad_len);
             continue;
         }
@@ -330,7 +338,12 @@ static void init_or_livepatch _apply_alt
 
         a->priv = 1;
 
-        add_nops(buf + a->repl_len, total_len - a->repl_len);
+        /* When re-inlining an indirect JMP, pad it by INT3, not NOPs. */
+        if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) &&
+             a->cpuid == X86_FEATURE_IND_THUNK_JMP && orig[0] == 0xe9 )
+            memset(buf + a->repl_len, 0xcc, total_len - a->repl_len);
+        else
+            add_nops(buf + a->repl_len, total_len - a->repl_len);
         text_poke(orig, buf, total_len);
     }
 
--- a/xen/arch/x86/asm-macros.c
+++ b/xen/arch/x86/asm-macros.c
@@ -1,2 +1,3 @@
 #include <asm/asm-defns.h>
 #include <asm/alternative-asm.h>
+#include <asm/spec_ctrl_asm.h>
--- a/xen/arch/x86/extable.c
+++ b/xen/arch/x86/extable.c
@@ -98,12 +98,42 @@ search_exception_table(const struct cpu_
          regs->rsp > (unsigned long)regs &&
          regs->rsp < (unsigned long)get_cpu_info() )
     {
-        unsigned long retptr = *(unsigned long *)regs->rsp;
+        unsigned long retaddr = *(unsigned long *)regs->rsp;
+        unsigned long retptr = 0;
+        unsigned int pad = 0;
 
-        region = find_text_region(retptr);
-        retptr = region && region->ex
-                 ? search_one_extable(region->ex, region->ex_end - 1, retptr)
-                 : 0;
+        region = find_text_region(retaddr);
+        while ( region && region->ex )
+        {
+            retptr = search_one_extable(region->ex, region->ex_end - 1,
+                                        retaddr + pad);
+            if ( !retptr )
+            {
+                /*
+                 * Indirect call thunk patching can lead to NOP padding between
+                 * the CALL and the recovery entry recorded in .fixup.  Skip
+                 * past such NOPs.  See asm/nops.h for the forms possible and
+                 * note that no more than 3 bytes of padding will be present.
+                 */
+                const uint8_t *ptr = (void *)retaddr + pad;
+
+                switch ( ptr[0] )
+                {
+                case 0x66:
+                case 0x90:
+                    if ( ++pad > 3 )
+                        break;
+                    continue;
+
+                case 0x0f:
+                    if ( pad || ptr[1] != 0x1f || ptr[2] != 0x00 )
+                        break;
+                    pad = 3;
+                    continue;
+                }
+            }
+            break;
+        }
         if ( retptr )
         {
             /*
--- a/xen/arch/x86/include/asm/asm-defns.h
+++ b/xen/arch/x86/include/asm/asm-defns.h
@@ -33,7 +33,13 @@
         $done = 0
         .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15
         .ifeqs "\arg", "%r\reg"
+#ifdef __ASM_MACROS__
             \insn __x86_indirect_thunk_r\reg
+#else
+            ALTERNATIVE_2 "\insn __x86_indirect_thunk_r\reg", \
+                          "lfence; \insn *\arg", X86_FEATURE_IND_THUNK_LFENCE, \
+                          "\insn *\arg", X86_FEATURE_IND_THUNK_JMP
+#endif
             $done = 1
            .exitm
         .endif
--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
@@ -20,7 +20,7 @@
 #ifndef __X86_SPEC_CTRL_ASM_H__
 #define __X86_SPEC_CTRL_ASM_H__
 
-#ifdef __ASSEMBLY__
+#if defined(__ASSEMBLY__) && !defined(__ASM_MACROS__)
 #include <asm/msr-index.h>
 #include <asm/spec_ctrl.h>
 
@@ -300,7 +300,50 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
 .L\@_skip:
 .endm
 
-#endif /* __ASSEMBLY__ */
+#elif defined(__ASM_MACROS__) && defined(CONFIG_INDIRECT_THUNK) && \
+      !defined(CLANG_INTEGRATED_AS)
+
+/*
+ * To allow re-inlining of indirect branches, override CALL and JMP insn
+ * mnemonics, to attach alternatives patching data.
+ */
+.macro call operand:vararg
+    branch$ call \operand
+.endm
+.macro callq operand:vararg
+    branch$ callq \operand
+.endm
+.macro jmp operand:vararg
+    branch$ jmp \operand
+.endm
+.macro branch$ insn:req, operand:vararg
+    .purgem \insn
+
+    $done = 0
+    .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15
+        .ifeqs "\operand", "__x86_indirect_thunk_r\reg"
+            ALTERNATIVE_2 "\insn \operand", \
+                          "lfence; \insn *%r\reg", X86_FEATURE_IND_THUNK_LFENCE, \
+                          "\insn *%r\reg", X86_FEATURE_IND_THUNK_JMP
+            $done = 1
+            .exitm
+        .endif
+        .ifeqs "\operand", "__x86_indirect_thunk_r\reg\(@plt)"
+            .error "unexpected: \insn \operand"
+            .exitm
+        .endif
+    .endr
+
+    .if !$done
+        \insn \operand
+    .endif
+
+    .macro \insn operand:vararg
+        branch$ \insn \\(operand)
+    .endm
+.endm
+
+#endif /* __ASSEMBLY__ / __ASM_MACROS__ */
 #endif /* !__X86_SPEC_CTRL_ASM_H__ */
 
 /*
--- a/xen/arch/x86/indirect-thunk.S
+++ b/xen/arch/x86/indirect-thunk.S
@@ -38,9 +38,13 @@
         .section .text.__x86_indirect_thunk_\reg, "ax", @progbits
 
 ENTRY(__x86_indirect_thunk_\reg)
+#ifdef CLANG_INTEGRATED_AS
         ALTERNATIVE_2 __stringify(IND_THUNK_RETPOLINE \reg),              \
         __stringify(IND_THUNK_LFENCE \reg), X86_FEATURE_IND_THUNK_LFENCE, \
         __stringify(IND_THUNK_JMP \reg),    X86_FEATURE_IND_THUNK_JMP
+#else
+        IND_THUNK_RETPOLINE \reg
+#endif
 
         int3 /* Halt straight-line speculation */