cpufeatures.h | 2 + nospec-branch.h | 80 +++++++++++++++++++++++++++----------------------------- 2 files changed, 41 insertions(+), 41 deletions(-)
Replying here, because obviously there's no actual posting of this
patch... :/
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -118,13 +118,28 @@
> #endif
> .endm
>
> +.macro ISSUE_UNBALANCED_RET_GUARD
> + ANNOTATE_INTRA_FUNCTION_CALL
> + call .Lunbalanced_ret_guard_\@
> + int3
> +.Lunbalanced_ret_guard_\@:
> + add $(BITS_PER_LONG/8), %_ASM_SP
> + lfence
> +.endm
> +
> /*
> * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
> * monstrosity above, manually.
> */
> -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
> +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
> +.ifb \ftr2
> ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
> +.else
> + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
> +.endif
> __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
> +.Lunbalanced_\@:
> + ISSUE_UNBALANCED_RET_GUARD
> .Lskip_rsb_\@:
> .endm
(/me deletes all the swear words and starts over)
This must absolutely be the most horrible patch you could come up with,
no? I suppose that's the price of me taking PTO :-(
Could you please test this; I've only compiled it.
---
Subject: x86/nospec: Unwreck the RSB stuffing
Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections")
made a right mess of the RSB stuffing, rewrite the whole thing to not
suck.
Thanks to Andrew for the enlightening comment about Post-Barrier RSB
things so we can make this code less magical.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
cpufeatures.h | 2 +
nospec-branch.h | 80 +++++++++++++++++++++++++++-----------------------------
2 files changed, 41 insertions(+), 41 deletions(-)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 235dc85c91c3..1a31ae6d758b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -420,6 +420,8 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_NEVER (-1) /* "" Logical complement of ALWAYS */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e64fd20778b6..336f8e8cebf8 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -35,33 +35,44 @@
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
+ call 772f; \
+ int3; \
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
* Google experimented with loop-unrolling and this turned out to be
* the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 772f; \
-773: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 774f; \
-775: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- add $(BITS_PER_LONG/8) * 2, sp; \
- dec reg; \
- jnz 771b; \
- /* barrier for jnz misprediction */ \
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ mov $(nr/2), reg; \
+771: \
+ __FILL_RETURN_SLOT \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
+ dec reg; \
+ jnz 771b; \
+ /* barrier for jnz misprediction */ \
+ lfence;
+
+/*
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
+ */
+#define __FILL_ONE_RETURN \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;
#ifdef __ASSEMBLY__
@@ -132,28 +143,15 @@
#endif
.endm
-.macro ISSUE_UNBALANCED_RET_GUARD
- ANNOTATE_INTRA_FUNCTION_CALL
- call .Lunbalanced_ret_guard_\@
- int3
-.Lunbalanced_ret_guard_\@:
- add $(BITS_PER_LONG/8), %_ASM_SP
- lfence
-.endm
-
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
-.ifb \ftr2
- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
-.else
- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
-.endif
- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
-.Lunbalanced_\@:
- ISSUE_UNBALANCED_RET_GUARD
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=X86_FEATURE_NEVER
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+ __stringify(__FILL_ONE_RETURN), \ftr2
+
.Lskip_rsb_\@:
.endm
On 8/16/22 05:28, Peter Zijlstra wrote: > > Could you please test this; I've only compiled it. > When booting I get the following BUG: ------------[ cut here ]------------ kernel BUG at arch/x86/kernel/alternative.c:290! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.0.0-rc1-00001-gb72b03c96999 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:apply_alternatives+0x287/0x2d0 Code: 4c 29 f6 03 74 24 13 89 74 24 13 45 85 c0 0f 85 d2 41 e9 00 41 0f b6 02 83 e0 fd 3c e9 0f 84 46 ff ff ff e9 5e fe ff ff 0f 0b <0f> 0b f7 c6 00 ff ff ff 0f 84 68 ff ff ff 8d 71 fb c6 44 24 12 e9 RSP: 0000:ffffffff82c03d68 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffffffff83728c24 RCX: 0000000000007fff RDX: 00000000ffffffea RSI: 0000000000000000 RDI: 000000000000ffff RBP: ffffffff82c03d7a R08: e800000010c4c749 R09: 0001e8cc00000001 R10: 10c48348cc000000 R11: e8ae0feb75ccff49 R12: ffffffff8372fcf8 R13: 0000000000000000 R14: ffffffff81001a68 R15: 000000000000001f FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88813ffff000 CR3: 0000000002c0c001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: <TASK> ? insn_get_opcode+0xef/0x1c0 ? ct_nmi_enter+0xb3/0x180 ? ct_nmi_exit+0xbe/0x1d0 ? irqentry_exit+0x2d/0x40 ? asm_common_interrupt+0x22/0x40 alternative_instructions+0x5b/0xf5 check_bugs+0xdaf/0xdef start_kernel+0x66a/0x6a2 secondary_startup_64_no_verify+0xe0/0xeb </TASK> Modules linked in: Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 0000000000000000 ]--- RIP: 0010:apply_alternatives+0x287/0x2d0 Code: 4c 29 f6 03 74 24 13 89 74 24 13 45 85 c0 0f 85 d2 41 e9 00 41 0f b6 02 83 e0 fd 3c e9 0f 84 46 ff ff ff e9 5e fe ff ff 0f 0b <0f> 0b f7 c6 00 ff ff ff 0f 84 68 ff ff ff 8d 71 fb c6 44 24 12 e9 RSP: 0000:ffffffff82c03d68 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffffffff83728c24 RCX: 0000000000007fff RDX: 00000000ffffffea RSI: 0000000000000000 RDI: 000000000000ffff RBP: ffffffff82c03d7a R08: e800000010c4c749 R09: 0001e8cc00000001 R10: 10c48348cc000000 R11: e8ae0feb75ccff49 R12: ffffffff8372fcf8 R13: 0000000000000000 R14: ffffffff81001a68 R15: 000000000000001f FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88813ffff000 CR3: 0000000002c0c001 CR4: 0000000000770ef0 PKRU: 55555554 Kernel panic - not syncing: Attempted to kill the idle task! Dumping ftrace buffer: (ftrace buffer empty) ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
On 8/16/22 10:34, Daniel Sneddon wrote:
> On 8/16/22 05:28, Peter Zijlstra wrote:
>
>>
>> Could you please test this; I've only compiled it.
>>
> When booting I get the following BUG:
>
> ------------[ cut here ]------------
> kernel BUG at arch/x86/kernel/alternative.c:290!
> invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.0.0-rc1-00001-gb72b03c96999 #3
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1
> 04/01/2014
> RIP: 0010:apply_alternatives+0x287/0x2d0
> Code: 4c 29 f6 03 74 24 13 89 74 24 13 45 85 c0 0f 85 d2 41 e9 00 41 0f b6 02 83
> e0 fd 3c e9 0f 84 46 ff ff ff e9 5e fe ff ff 0f 0b <0f> 0b f7 c6 00 ff ff ff 0f
> 84 68 ff ff ff 8d 71 fb c6 44 24 12 e9
> RSP: 0000:ffffffff82c03d68 EFLAGS: 00010206
> RAX: 0000000000000000 RBX: ffffffff83728c24 RCX: 0000000000007fff
> RDX: 00000000ffffffea RSI: 0000000000000000 RDI: 000000000000ffff
> RBP: ffffffff82c03d7a R08: e800000010c4c749 R09: 0001e8cc00000001
> R10: 10c48348cc000000 R11: e8ae0feb75ccff49 R12: ffffffff8372fcf8
> R13: 0000000000000000 R14: ffffffff81001a68 R15: 000000000000001f
> FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: ffff88813ffff000 CR3: 0000000002c0c001 CR4: 0000000000770ef0
> PKRU: 55555554
> Call Trace:
> <TASK>
> ? insn_get_opcode+0xef/0x1c0
> ? ct_nmi_enter+0xb3/0x180
> ? ct_nmi_exit+0xbe/0x1d0
> ? irqentry_exit+0x2d/0x40
> ? asm_common_interrupt+0x22/0x40
> alternative_instructions+0x5b/0xf5
> check_bugs+0xdaf/0xdef
> start_kernel+0x66a/0x6a2
> secondary_startup_64_no_verify+0xe0/0xeb
> </TASK>
> Modules linked in:
> Dumping ftrace buffer:
> (ftrace buffer empty)
> ---[ end trace 0000000000000000 ]---
> RIP: 0010:apply_alternatives+0x287/0x2d0
> Code: 4c 29 f6 03 74 24 13 89 74 24 13 45 85 c0 0f 85 d2 41 e9 00 41 0f b6 02 83
> e0 fd 3c e9 0f 84 46 ff ff ff e9 5e fe ff ff 0f 0b <0f> 0b f7 c6 00 ff ff ff 0f
> 84 68 ff ff ff 8d 71 fb c6 44 24 12 e9
> RSP: 0000:ffffffff82c03d68 EFLAGS: 00010206
> RAX: 0000000000000000 RBX: ffffffff83728c24 RCX: 0000000000007fff
> RDX: 00000000ffffffea RSI: 0000000000000000 RDI: 000000000000ffff
> RBP: ffffffff82c03d7a R08: e800000010c4c749 R09: 0001e8cc00000001
> R10: 10c48348cc000000 R11: e8ae0feb75ccff49 R12: ffffffff8372fcf8
> R13: 0000000000000000 R14: ffffffff81001a68 R15: 000000000000001f
> FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: ffff88813ffff000 CR3: 0000000002c0c001 CR4: 0000000000770ef0
> PKRU: 55555554
> Kernel panic - not syncing: Attempted to kill the idle task!
> Dumping ftrace buffer:
> (ftrace buffer empty)
> ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
>
>
>
I can get it to work with the below changes. I had to change the -1 to 0x7FFF
because bit 15 is masked out in apply_alternatives(). Not sure if that's the
right way to fix it, but it boots and has the correct assembly code in
vmx_vmexit, so the alternative does get applied correctly there.
---
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1a31ae6d758b..c5b55c9f2849 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -420,7 +420,7 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced
cache coherency */
-#define X86_FEATURE_NEVER (-1) /* "" Logical complement of ALWAYS */
+#define X86_FEATURE_NEVER (0x7FFF) /* "" Logical complement of
ALWAYS */
/*
* BUG word(s)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 62f6b8b7c4a5..5c476b37b3bc 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -284,6 +284,9 @@ void __init_or_module noinline apply_alternatives(struct
alt_instr *start,
/* Mask away "NOT" flag bit for feature to test. */
u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
+ if (feature == X86_FEATURE_NEVER)
+ goto next;
+
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
On Tue, Aug 16, 2022 at 11:04:36AM -0700, Daniel Sneddon wrote: > diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h > index 1a31ae6d758b..c5b55c9f2849 100644 > --- a/arch/x86/include/asm/cpufeatures.h > +++ b/arch/x86/include/asm/cpufeatures.h > @@ -420,7 +420,7 @@ > #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ > #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced > cache coherency */ > > -#define X86_FEATURE_NEVER (-1) /* "" Logical complement of ALWAYS */ > +#define X86_FEATURE_NEVER (0x7FFF) /* "" Logical complement of > ALWAYS */ Bah, I initially spelled that: ALT_NOT(X86_FEATURE_ALWAYS), but Boris made me do the -1 thing there. Oh well, Boris can fix that :-)
On Wed, Aug 17, 2022 at 08:55:08AM +0200, Peter Zijlstra wrote:
> On Tue, Aug 16, 2022 at 11:04:36AM -0700, Daniel Sneddon wrote:
> > diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> > index 1a31ae6d758b..c5b55c9f2849 100644
> > --- a/arch/x86/include/asm/cpufeatures.h
> > +++ b/arch/x86/include/asm/cpufeatures.h
> > @@ -420,7 +420,7 @@
> > #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
> > #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced
> > cache coherency */
> >
> > -#define X86_FEATURE_NEVER (-1) /* "" Logical complement of ALWAYS */
> > +#define X86_FEATURE_NEVER (0x7FFF) /* "" Logical complement of
> > ALWAYS */
>
>
> Bah, I initially spelled that: ALT_NOT(X86_FEATURE_ALWAYS), but Boris
> made me do the -1 thing there. Oh well, Boris can fix that :-)
Boris, how's this then? Will you cram it into x86/urgent ?
---
Subject: x86/nospec: Unwreck the RSB stuffing
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 16 Aug 2022 14:28:36 +0200
Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections")
made a right mess of the RSB stuffing, rewrite the whole thing to not
suck.
Thanks to Andrew for the enlightening comment about Post-Barrier RSB
things so we can make this code less magical.
Cc: stable@vger.kernel.org
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/nospec-branch.h | 80 +++++++++++++++++------------------
1 file changed, 39 insertions(+), 41 deletions(-)
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -35,33 +35,44 @@
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
+ call 772f; \
+ int3; \
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
* Google experimented with loop-unrolling and this turned out to be
* the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 772f; \
-773: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 774f; \
-775: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- add $(BITS_PER_LONG/8) * 2, sp; \
- dec reg; \
- jnz 771b; \
- /* barrier for jnz misprediction */ \
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ mov $(nr/2), reg; \
+771: \
+ __FILL_RETURN_SLOT \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
+ dec reg; \
+ jnz 771b; \
+ /* barrier for jnz misprediction */ \
+ lfence;
+
+/*
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
+ */
+#define __FILL_ONE_RETURN \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;
#ifdef __ASSEMBLY__
@@ -132,28 +143,15 @@
#endif
.endm
-.macro ISSUE_UNBALANCED_RET_GUARD
- ANNOTATE_INTRA_FUNCTION_CALL
- call .Lunbalanced_ret_guard_\@
- int3
-.Lunbalanced_ret_guard_\@:
- add $(BITS_PER_LONG/8), %_ASM_SP
- lfence
-.endm
-
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
-.ifb \ftr2
- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
-.else
- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
-.endif
- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
-.Lunbalanced_\@:
- ISSUE_UNBALANCED_RET_GUARD
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+ __stringify(__FILL_ONE_RETURN), \ftr2
+
.Lskip_rsb_\@:
.endm
On August 16, 2022 6:04:36 PM UTC, Daniel Sneddon <daniel.sneddon@linux.intel.com> wrote: >diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c >index 62f6b8b7c4a5..5c476b37b3bc 100644 >--- a/arch/x86/kernel/alternative.c >+++ b/arch/x86/kernel/alternative.c >@@ -284,6 +284,9 @@ void __init_or_module noinline apply_alternatives(struct >alt_instr *start, > /* Mask away "NOT" flag bit for feature to test. */ > u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV; I guess it is time for struct altinstr.flags. I never liked this INV mask bit... > >+ if (feature == X86_FEATURE_NEVER) >+ goto next; >+ > instr = (u8 *)&a->instr_offset + a->instr_offset; > replacement = (u8 *)&a->repl_offset + a->repl_offset; > BUG_ON(a->instrlen > sizeof(insn_buff)); > -- Sent from a small device: formatting sux and brevity is inevitable.
On 16/08/2022 13:28, Peter Zijlstra wrote: > diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h > index e64fd20778b6..336f8e8cebf8 100644 > --- a/arch/x86/include/asm/nospec-branch.h > +++ b/arch/x86/include/asm/nospec-branch.h > @@ -35,33 +35,44 @@ > #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ > > /* > + * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. > + */ > +#define __FILL_RETURN_SLOT \ > + ANNOTATE_INTRA_FUNCTION_CALL; \ > + call 772f; \ > + int3; \ > +772: > + > +/* > + * Stuff the entire RSB. One minor point. Stuff 32 slots. There are Intel parts out in the world with RSBs larger than 32 entries, and this loop does not fill the entire RSB on those. This is why the 32-entry stuffing loop is explicitly not supported on eIBRS hardware as a general mechanism. ~Andrew
On Tue, Aug 16, 2022 at 12:52:58PM +0000, Andrew Cooper wrote:
> One minor point. Stuff 32 slots.
>
> There are Intel parts out in the world with RSBs larger than 32 entries,
> and this loop does not fill the entire RSB on those.
>
> This is why the 32-entry stuffing loop is explicitly not supported on
> eIBRS hardware as a general mechanism.
I'm guessing there will be an Intel patch forthcoming, making that
RSB_CLEAR_LOOPS more dynamic, based on the current model.
:-)
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Aug 16, 2022 at 03:01:33PM +0200, Borislav Petkov wrote: > On Tue, Aug 16, 2022 at 12:52:58PM +0000, Andrew Cooper wrote: > > One minor point. Stuff 32 slots. > > > > There are Intel parts out in the world with RSBs larger than 32 entries, > > and this loop does not fill the entire RSB on those. > > > > This is why the 32-entry stuffing loop is explicitly not supported on > > eIBRS hardware as a general mechanism. > > I'm guessing there will be an Intel patch forthcoming, making that > RSB_CLEAR_LOOPS more dynamic, based on the current model. This is being discussed internally, but likely Enhanced IBRS parts don't need RSB stuffing (except for the single entry stuffing for mitigating PBRSB).
On Tue, Aug 16, 2022 at 02:28:36PM +0200, Peter Zijlstra wrote:
>
> Replying here, because obviously there's no actual posting of this
> patch... :/
{sigh} True :(
>
> > --- a/arch/x86/include/asm/nospec-branch.h
> > +++ b/arch/x86/include/asm/nospec-branch.h
> > @@ -118,13 +118,28 @@
> > #endif
> > .endm
> >
> > +.macro ISSUE_UNBALANCED_RET_GUARD
> > + ANNOTATE_INTRA_FUNCTION_CALL
> > + call .Lunbalanced_ret_guard_\@
> > + int3
> > +.Lunbalanced_ret_guard_\@:
> > + add $(BITS_PER_LONG/8), %_ASM_SP
> > + lfence
> > +.endm
> > +
> > /*
> > * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
> > * monstrosity above, manually.
> > */
> > -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
> > +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
> > +.ifb \ftr2
> > ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
> > +.else
> > + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
> > +.endif
> > __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
> > +.Lunbalanced_\@:
> > + ISSUE_UNBALANCED_RET_GUARD
> > .Lskip_rsb_\@:
> > .endm
>
> (/me deletes all the swear words and starts over)
>
> This must absolutely be the most horrible patch you could come up with,
> no? I suppose that's the price of me taking PTO :-(
>
> Could you please test this; I've only compiled it.
>
> ---
> Subject: x86/nospec: Unwreck the RSB stuffing
>
> Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections")
> made a right mess of the RSB stuffing, rewrite the whole thing to not
> suck.
>
> Thanks to Andrew for the enlightening comment about Post-Barrier RSB
> things so we can make this code less magical.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
I need an Intel person to test this as I have no idea how to do so as
this is an issue in Linus's tree.
thanks,
greg k-h
On Tue, Aug 16, 2022 at 02:33:34PM +0200, Greg Kroah-Hartman wrote:
> I need an Intel person
Daniel?
> to test this as I have no idea how to do so as his is an issue in
> tLinus's tree.
If this passes - and I have my both fingers crossed - this will be an
amazing simplification.
Might wanna take it into stable too, when ready.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 8/16/22 05:36, Borislav Petkov wrote: > On Tue, Aug 16, 2022 at 02:33:34PM +0200, Greg Kroah-Hartman wrote: >> I need an Intel person > > Daniel? I'll test this. > >> to test this as I have no idea how to do so as his is an issue in >> tLinus's tree. > > If this passes - and I have my both fingers crossed - this will be an > amazing simplification. > > Might wanna take it into stable too, when ready. > > Thx. >
On Tue, Aug 16, 2022 at 02:36:53PM +0200, Borislav Petkov wrote: > On Tue, Aug 16, 2022 at 02:33:34PM +0200, Greg Kroah-Hartman wrote: > > I need an Intel person > > Daniel? > > > to test this as I have no idea how to do so as his is an issue in > > tLinus's tree. > > If this passes - and I have my both fingers crossed - this will be an > amazing simplification. > > Might wanna take it into stable too, when ready. I'd be glad to :)
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 4e3aa9238277597c6c7624f302d81a7b568b6f2d
Gitweb: https://git.kernel.org/tip/4e3aa9238277597c6c7624f302d81a7b568b6f2d
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 16 Aug 2022 14:28:36 +02:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 19 Aug 2022 13:24:32 +02:00
x86/nospec: Unwreck the RSB stuffing
Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections")
made a right mess of the RSB stuffing, rewrite the whole thing to not
suck.
Thanks to Andrew for the enlightening comment about Post-Barrier RSB
things so we can make this code less magical.
Cc: stable@vger.kernel.org
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/YvuNdDWoUZSBjYcm@worktop.programming.kicks-ass.net
---
arch/x86/include/asm/nospec-branch.h | 80 +++++++++++++--------------
1 file changed, 39 insertions(+), 41 deletions(-)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e64fd20..10731cc 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -35,33 +35,44 @@
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
+ call 772f; \
+ int3; \
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
* Google experimented with loop-unrolling and this turned out to be
* the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 772f; \
-773: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 774f; \
-775: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- add $(BITS_PER_LONG/8) * 2, sp; \
- dec reg; \
- jnz 771b; \
- /* barrier for jnz misprediction */ \
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ mov $(nr/2), reg; \
+771: \
+ __FILL_RETURN_SLOT \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
+ dec reg; \
+ jnz 771b; \
+ /* barrier for jnz misprediction */ \
+ lfence;
+
+/*
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
+ */
+#define __FILL_ONE_RETURN \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;
#ifdef __ASSEMBLY__
@@ -132,28 +143,15 @@
#endif
.endm
-.macro ISSUE_UNBALANCED_RET_GUARD
- ANNOTATE_INTRA_FUNCTION_CALL
- call .Lunbalanced_ret_guard_\@
- int3
-.Lunbalanced_ret_guard_\@:
- add $(BITS_PER_LONG/8), %_ASM_SP
- lfence
-.endm
-
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
-.ifb \ftr2
- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
-.else
- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
-.endif
- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
-.Lunbalanced_\@:
- ISSUE_UNBALANCED_RET_GUARD
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+ __stringify(__FILL_ONE_RETURN), \ftr2
+
.Lskip_rsb_\@:
.endm
© 2016 - 2026 Red Hat, Inc.