Instead of making increasingly complicated ALTERNATIVE_n()
implementations, use a nested alternative expression.
The only difference between:
ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2)
and
ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1),
newinst2, flag2)
is that the outer alternative can add additional padding when the
inner alternative is the shorter one, which then results in
alt_instr::instrlen being inconsistent.
However, this is easily remedied since the alt_instr entries will be
consecutive and it is trivial to compute the max(alt_instr::instrlen)
at runtime while patching.
Specifically, after this patch the ALTERNATIVE_2 macro, after CPP
expansion (and manual layout), looks like this:
.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
140:
140: \oldinstr ;
141: .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ;
142: .pushsection .altinstructions,"a" ;
altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f ;
.popsection ; .pushsection .altinstr_replacement,"ax" ;
143: \newinstr1 ;
144: .popsection ; ;
141: .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ;
142: .pushsection .altinstructions,"a" ;
altinstr_entry 140b,143f,\ft_flags2,142b-140b,144f-143f ;
.popsection ;
.pushsection .altinstr_replacement,"ax" ;
143: \newinstr2 ;
144: .popsection ;
.endm
The only label that is ambiguous is 140, however they all reference
the same spot, so that doesn't matter.
NOTE: obviously only @oldinstr may be an alternative; making @newinstr
an alternative would mean patching .altinstr_replacement which very
likely isn't what is intended, also the labels will be confused in
that case.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230628104952.GA2439977@hirez.programming.kicks-ass.net
---
arch/x86/include/asm/alternative.h | 206 ++++++++++---------------------------
arch/x86/kernel/alternative.c | 13 ++
tools/objtool/arch/x86/special.c | 23 ++++
tools/objtool/special.c | 16 +-
4 files changed, 100 insertions(+), 158 deletions(-)
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -150,102 +150,60 @@ static inline int alternatives_text_rese
}
#endif /* CONFIG_SMP */
-#define b_replacement(num) "664"#num
-#define e_replacement(num) "665"#num
-
-#define alt_end_marker "663"
#define alt_slen "662b-661b"
-#define alt_total_slen alt_end_marker"b-661b"
-#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
+#define alt_total_slen "663b-661b"
+#define alt_rlen "665f-664f"
-#define OLDINSTR(oldinstr, num) \
+#define OLDINSTR(oldinstr) \
"# ALT: oldnstr\n" \
"661:\n\t" oldinstr "\n662:\n" \
"# ALT: padding\n" \
- ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \
- alt_end_marker ":\n"
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
+ ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
+ "663:\n"
-/*
- * Pad the second replacement alternative with additional NOPs if it is
- * additionally longer than the first replacement alternative.
- */
-#define OLDINSTR_2(oldinstr, num1, num2) \
- "# ALT: oldinstr2\n" \
- "661:\n\t" oldinstr "\n662:\n" \
- "# ALT: padding2\n" \
- ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define OLDINSTR_3(oldinsn, n1, n2, n3) \
- "# ALT: oldinstr3\n" \
- "661:\n\t" oldinsn "\n662:\n" \
- "# ALT: padding3\n" \
- ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define ALTINSTR_ENTRY(ft_flags, num) \
+#define ALTINSTR_ENTRY(ft_flags) \
+ ".pushsection .altinstructions,\"a\"\n" \
" .long 661b - .\n" /* label */ \
- " .long " b_replacement(num)"f - .\n" /* new instruction */ \
+ " .long 664f - .\n" /* new instruction */ \
" .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
" .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen(num) "\n" /* replacement len */
-
-#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \
- "# ALT: replacement " #num "\n" \
- b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
-
-/* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- OLDINSTR(oldinstr, 1) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags, 1) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr, 1) \
+ " .byte " alt_rlen "\n" /* replacement len */ \
".popsection\n"
-#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
- OLDINSTR_2(oldinstr, 1, 2) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr1, 1) \
- ALTINSTR_REPLACEMENT(newinstr2, 2) \
+#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ "# ALT: replacement \n" \
+ "664:\n\t" newinstr "\n 665:\n" \
".popsection\n"
+/*
+ * Define an alternative between two instructions. If @ft_flags is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ *
+ * Notably: @oldinstr may be an ALTERNATIVE() itself, also see
+ * apply_alternatives()
+ */
+#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+ OLDINSTR(oldinstr) \
+ ALTINSTR_ENTRY(ft_flags) \
+ ALTINSTR_REPLACEMENT(newinstr)
+
+#define ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2) \
+ ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1), \
+ newinst2, flag2)
+
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
newinstr_yes, ft_flags)
-#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \
- newinsn3, ft_flags3) \
- OLDINSTR_3(oldinsn, 1, 2, 3) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ALTINSTR_ENTRY(ft_flags3, 3) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinsn1, 1) \
- ALTINSTR_REPLACEMENT(newinsn2, 2) \
- ALTINSTR_REPLACEMENT(newinsn3, 3) \
- ".popsection\n"
+#define ALTERNATIVE_3(oldinst, newinst1, flag1, newinst2, flag2, \
+ newinst3, flag3) \
+ ALTERNATIVE(ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2), \
+ newinst3, flag3)
/*
* Alternative instructions for different CPU types or capabilities.
@@ -370,6 +328,21 @@ static inline int alternatives_text_rese
.byte \alt_len
.endm
+#define __ALTERNATIVE(oldinst, newinst, flag) \
+140: \
+ oldinst ; \
+141: \
+ .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ;\
+142: \
+ .pushsection .altinstructions,"a" ; \
+ altinstr_entry 140b,143f,flag,142b-140b,144f-143f ; \
+ .popsection ; \
+ .pushsection .altinstr_replacement,"ax" ; \
+143: \
+ newinst ; \
+144: \
+ .popsection ;
+
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
@@ -377,88 +350,23 @@ static inline int alternatives_text_rese
* in case @newinstr is longer than @oldinstr.
*/
.macro ALTERNATIVE oldinstr, newinstr, ft_flags
-140:
- \oldinstr
-141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags,142b-140b,144f-143f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr
-144:
- .popsection
+ __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
.endm
-#define old_len 141b-140b
-#define new_len1 144f-143f
-#define new_len2 145f-144f
-#define new_len3 146f-145f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c))
-
-
/*
* Same as ALTERNATIVE macro above but for two alternatives. If CPU
* has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
* @feature2, it replaces @oldinstr with @feature2.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
-140:
- \oldinstr
-141:
- .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_2(new_len1, new_len2) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- .popsection
+ __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
+ \newinstr2, \ft_flags2)
.endm
.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3
-140:
- \oldinstr
-141:
- .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \
- (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- altinstr_entry 140b,145f,\ft_flags3,142b-140b,146f-145f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- \newinstr3
-146:
- .popsection
+ __ALTERNATIVE(__ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
+ \newinstr2, \ft_flags2),
+ \newinstr3, \ft_flags3)
.endm
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -398,7 +398,7 @@ apply_relocation(u8 *buf, size_t len, u8
void __init_or_module noinline apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
- struct alt_instr *a;
+ struct alt_instr *a, *b;
u8 *instr, *replacement;
u8 insn_buff[MAX_PATCH_LEN];
@@ -415,6 +415,17 @@ void __init_or_module noinline apply_alt
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
+ /*
+ * In case of nested ALTERNATIVE()s the outer alternative might
+ * add more padding. To ensure consistent patching find the max
+ * padding for all alt_instr entries for this site (nested
+ * alternatives result in consecutive entries).
+ */
+ for (b = a+1; b < end && b->instr_offset == a->instr_offset; b++) {
+ u8 len = max(a->instrlen, b->instrlen);
+ a->instrlen = b->instrlen = len;
+ }
+
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -9,6 +9,29 @@
void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
{
+ static struct special_alt *group, *prev;
+
+ /*
+ * Recompute orig_len for nested ALTERNATIVE()s.
+ */
+ if (group && group->orig_sec == alt->orig_sec &&
+ group->orig_off == alt->orig_off) {
+
+ struct special_alt *iter = group;
+ for (;;) {
+ unsigned int len = max(iter->orig_len, alt->orig_len);
+ iter->orig_len = alt->orig_len = len;
+
+ if (iter == prev)
+ break;
+
+ iter = list_next_entry(iter, list);
+ }
+
+ } else group = alt;
+
+ prev = alt;
+
switch (feature) {
case X86_FEATURE_SMAP:
/*
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -84,6 +84,14 @@ static int get_alt_entry(struct elf *elf
entry->new_len);
}
+ orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
+ if (!orig_reloc) {
+ WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
+ return -1;
+ }
+
+ reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off);
+
if (entry->feature) {
unsigned short feature;
@@ -94,14 +102,6 @@ static int get_alt_entry(struct elf *elf
arch_handle_alternative(feature, alt);
}
- orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
- if (!orig_reloc) {
- WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
- return -1;
- }
-
- reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off);
-
if (!entry->group || alt->new_len) {
new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
if (!new_reloc) {
On Mon, Aug 14, 2023 at 01:44:36PM +0200, Peter Zijlstra wrote:
> Instead of making increasingly complicated ALTERNATIVE_n()
> implementations, use a nested alternative expression.
>
> The only difference between:
>
> ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2)
>
> and
>
> ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1),
> newinst2, flag2)
Hmm, one more problem I see with this. You're handling it, it seems, but
the whole thing doesn't feel clean to me.
Here's an exemplary eval:
> #APP
> # 53 "./arch/x86/include/asm/page_64.h" 1
> # ALT: oldnstr
> 661:
> # ALT: oldnstr
> 661:
<--- X
> call clear_page_orig #
> 662:
> # ALT: padding
> .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90
> 663:
> .pushsection .altinstructions,"a"
> .long 661b - .
> .long 664f - .
> .4byte ( 3*32+16)
> .byte 663b-661b
> .byte 665f-664f
> .popsection
> .pushsection .altinstr_replacement, "ax"
> # ALT: replacement
> 664:
> call clear_page_rep #
> 665:
> .popsection
>
> 662:
> # ALT: padding
> .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90
> 663:
<--- Z
So here it would add the padding again, unnecessarily.
> .pushsection .altinstructions,"a"
> .long 661b - .
This refers to the 661 label, if you count backwards it would be the
second 661 label at my marker X above.
> .long 664f - .
This is the 664 label at my marker Y below.
> .4byte ( 9*32+ 9)
> .byte 663b-661b
And here's where it gets interesting. That's source length. 663
backwards label is at marker Z which includes the second padding.
So if we do a lot of padding, that might grow vmlinux. Not a big deal
but still... Have you measured how much allyesconfig builds grow with
this patch?
> .byte 665f-664f
> .popsection
> .pushsection .altinstr_replacement, "ax"
> # ALT: replacement
> 664:
<--- Y
> call clear_page_erms #
> 665:
> .popsection
In any case, I'd still like to solve this in a clean way, without the
fixup and unnecessary padding addition.
Lemme play some more with the preprocessor...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Thu, Sep 07, 2023 at 10:31:58AM +0200, Borislav Petkov wrote:
> On Mon, Aug 14, 2023 at 01:44:36PM +0200, Peter Zijlstra wrote:
> > Instead of making increasingly complicated ALTERNATIVE_n()
> > implementations, use a nested alternative expression.
> >
> > The only difference between:
> >
> > ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2)
> >
> > and
> >
> > ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1),
> > newinst2, flag2)
>
> Hmm, one more problem I see with this. You're handling it, it seems, but
> the whole thing doesn't feel clean to me.
>
> Here's an exemplary eval:
>
> > #APP
> > # 53 "./arch/x86/include/asm/page_64.h" 1
> > # ALT: oldnstr
> > 661:
> > # ALT: oldnstr
> > 661:
>
> <--- X
>
> > call clear_page_orig #
> > 662:
> > # ALT: padding
> > .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90
665f-664f = 5 (rep)
662b-661b = 5 (orig)
5-5 > 0 = 0
so no padding
> > 663:
> > .pushsection .altinstructions,"a"
> > .long 661b - .
> > .long 664f - .
> > .4byte ( 3*32+16)
> > .byte 663b-661b
> > .byte 665f-664f
> > .popsection
> > .pushsection .altinstr_replacement, "ax"
> > # ALT: replacement
> > 664:
> > call clear_page_rep #
> > 665:
> > .popsection
> >
> > 662:
> > # ALT: padding
> > .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90
> > 663:
>
> <--- Z
>
> So here it would add the padding again, unnecessarily.
665f-664f = 5 (erms)
662b-661b = 5 (orig + padding)
5-5 > 0 = 0
no padding, also since, as you note 661b is the first, we include all
previous padding, and the skip will only add additional padding if the
new sequence is longer still.
So, no I'm not seeing it. Doubly not with this example where all 3
variants are 5 bytes.
Notably, the following nonsense alternative with 1 2 and 3 bytes
instructions:
asm volatile (
ALTERNATIVE_2("push %rbp",
"push %r12", X86_FEATURE_ALWAYS,
"mov %rsp,%rbp", X86_FEATURE_ALWAYS));
ends up as:
0004 204: 55 push %rbp
0005 205: 90 nop
0006 206: 90 nop
If you flip the 3 and 2 byte instructions the result is the same. No
extra padding.
And no, I had not actually tested this before, because clearly this is
all obvious ;-)
Anyway, the 1,3,2 variant spelled out reads like:
#APP
# 1563 "../arch/x86/kernel/alternative.c" 1
# ALT: oldnstr
661:
# ALT: oldnstr
661:
push %rbp
662:
# ALT: padding
.skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90
# Which evaluates like:
# 665f-664f = 3
# 662b-661b = 1
# 3-1 > 0 = -1
# --1 * (3-1) = 2
#
# so two single byte nops get emitted here.
663:
.pushsection .altinstructions,"a"
.long 661b - .
.long 664f - .
.4byte ( 3*32+21)
.byte 663b-661b
.byte 665f-664f
.popsection
.pushsection .altinstr_replacement, "ax"
# ALT: replacement
664:
mov %rsp,%rbp
665:
.popsection
662:
# ALT: padding
.skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90
# And this evaluates to:
# 665f-664f = 2
# 662b-661b = 3 (because it includes the original 1 byte instruction and 2 bytes padding)
# 3-1 > 0 = 0
# 0 * (3-1) = 0
#
# so no extra padding
663:
.pushsection .altinstructions,"a"
.long 661b - .
.long 664f - .
.4byte ( 3*32+21)
.byte 663b-661b
.byte 665f-664f
.popsection
.pushsection .altinstr_replacement, "ax"
# ALT: replacement
664:
push %r12
665:
.popsection
# 0 "" 2
# ../arch/x86/kernel/alternative.c:1569: int3_selftest();
#NO_APP
On Thu, Sep 07, 2023 at 01:09:17PM +0200, Peter Zijlstra wrote: > Anyway, the 1,3,2 variant spelled out reads like: > > #APP > # 1563 "../arch/x86/kernel/alternative.c" 1 > # ALT: oldnstr > 661: > # ALT: oldnstr > 661: > push %rbp > 662: > # ALT: padding > .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90 > > # Which evaluates like: > # 665f-664f = 3 > # 662b-661b = 1 > # 3-1 > 0 = -1 > # --1 * (3-1) = 2 > # > # so two single byte nops get emitted here. > > 663: > .pushsection .altinstructions,"a" > .long 661b - . > .long 664f - . > .4byte ( 3*32+21) > .byte 663b-661b > .byte 665f-664f > .popsection > .pushsection .altinstr_replacement, "ax" > # ALT: replacement > 664: > mov %rsp,%rbp > 665: > .popsection > > 662: > # ALT: padding > .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90 > > # And this evaluates to: > # 665f-664f = 2 > # 662b-661b = 3 (because it includes the original 1 byte instruction and 2 bytes padding) > # 3-1 > 0 = 0 > # 0 * (3-1) = 0 copy-paste fail, that needs to read: 3-3 > 0 = 0 0 * (3-3) = 0 > # > # so no extra padding > > 663: > .pushsection .altinstructions,"a" > .long 661b - . > .long 664f - . > .4byte ( 3*32+21) > .byte 663b-661b > .byte 665f-664f > .popsection > .pushsection .altinstr_replacement, "ax" > # ALT: replacement > 664: > push %r12 > 665: > .popsection > > # 0 "" 2 > # ../arch/x86/kernel/alternative.c:1569: int3_selftest(); > #NO_APP
On Thu, Sep 07, 2023 at 01:11:00PM +0200, Peter Zijlstra wrote: > On Thu, Sep 07, 2023 at 01:09:17PM +0200, Peter Zijlstra wrote: > > > Anyway, the 1,3,2 variant spelled out reads like: > > > > #APP > > # 1563 "../arch/x86/kernel/alternative.c" 1 > > # ALT: oldnstr > > 661: > > # ALT: oldnstr > > 661: > > push %rbp > > 662: > > # ALT: padding > > .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90 > > > > # Which evaluates like: > > # 665f-664f = 3 > > # 662b-661b = 1 > > # 3-1 > 0 = -1 > > # --1 * (3-1) = 2 > > # > > # so two single byte nops get emitted here. > > > > 663: > > .pushsection .altinstructions,"a" > > .long 661b - . > > .long 664f - . > > .4byte ( 3*32+21) > > .byte 663b-661b > > .byte 665f-664f > > .popsection > > .pushsection .altinstr_replacement, "ax" > > # ALT: replacement > > 664: > > mov %rsp,%rbp > > 665: > > .popsection > > > > 662: > > # ALT: padding > > .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90 > > > > # And this evaluates to: > > # 665f-664f = 2 > > # 662b-661b = 3 (because it includes the original 1 byte instruction and 2 bytes padding) > > # 3-1 > 0 = 0 > > # 0 * (3-1) = 0 > > copy-paste fail, that needs to read: > > 3-3 > 0 = 0 > 0 * (3-3) = 0 I'm a moron ofcourse: 2-3 > > > # > > # so no extra padding > > > > 663: > > .pushsection .altinstructions,"a" > > .long 661b - . > > .long 664f - . > > .4byte ( 3*32+21) > > .byte 663b-661b > > .byte 665f-664f > > .popsection > > .pushsection .altinstr_replacement, "ax" > > # ALT: replacement > > 664: > > push %r12 > > 665: > > .popsection > > > > # 0 "" 2 > > # ../arch/x86/kernel/alternative.c:1569: int3_selftest(); > > #NO_APP
On Thu, Sep 07, 2023 at 01:09:17PM +0200, Peter Zijlstra wrote:
> If you flip the 3 and 2 byte instructions the result is the same. No
> extra padding.
>
> And no, I had not actually tested this before, because clearly this is
> all obvious ;-)
IKR.
So I take that extra padding thing back - we actually *must* have that
padding so that it actually works correctly. I just did a silly example
but nothing says one cannot do one like that today:
alternative_2("", "pop %%rax", X86_FEATURE_ALWAYS,
"call clear_page_orig", X86_FEATURE_ALWAYS);
An order of insns which grows in size: 0, then 1, then 5.
It turns into:
> # arch/x86/mm/init.c:163: alternative_2("", "pop %%rax", X86_FEATURE_ALWAYS,
> # 163 "arch/x86/mm/init.c" 1
> # ALT: oldnstr
> 661:
> # ALT: oldnstr
> 661:
>
> 662:
> # ALT: padding
> .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90
IINM, this turns into:
.skip 1 * (1 - 0) = 1.
because "pop %rax" is one byte. The original insn is of size 0.
So we end up with a 0x90 here.
> 663:
> .pushsection .altinstructions,"a"
> .long 661b - .
> .long 664f - .
> .4byte ( 3*32+21)
> .byte 663b-661b
> .byte 665f-664f
> .popsection
> .pushsection .altinstr_replacement, "ax"
> # ALT: replacement
> 664:
> pop %rax
> 665:
> .popsection
>
> 662:
<--- X
> # ALT: padding
> .skip -(((665f-664f)-(662b-661b)) > 0) * ((665f-664f)-(662b-661b)),0x90
Now the second guy comes in. That turns into:
.skip 1 * (5 - 1) = 4
Because, IINM, the 662 label above is the *second* one at marker X (we
go backwards) and the 661 is the second one too.
So between those two labels you have the 0x90 - one byte padding from
the first .skip.
And now it adds 4 more bytes to accomodate the CALL.
So we need to have that padding back-to-back in case the second
replacement is longer.
Ok, I guess the only thing that's bothering me is:
> # ALT: oldnstr
> 661:
> # ALT: oldnstr
> 661:
I'll keep on playing with this.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Thu, Sep 07, 2023 at 05:06:32PM +0200, Borislav Petkov wrote:
> > # ALT: oldnstr
> > 661:
> > # ALT: oldnstr
> > 661:
>
> I'll keep on playing with this.
Ok, below's what I've been thinking. It looks ok but I'll keep staring
at it for a while to make sure I'm not missing an angle.
We simply pass a number to the ALTERNATIVE macro, starting from 0. 0 is
the innermost invocation, 1 is the outer and so on. And then the labels
are unique and the sizes are correct. And we hardcode 0 to mean the
innermost macro invocation and use that for sizing of orig instr.
But I might be missing something so lemme poke at it more. Below is
a userspace program which makes this a lot easier to experiment with:
---
#include <stdio.h>
#define __stringify_1(x...) #x
#define __stringify(x...) __stringify_1(x)
#define alt_slen "662b-6610b"
#define alt_total_slen "663b-661b"
#define alt_rlen "665f-664f"
#define OLDINSTR(oldinstr, n) \
"# ALT: oldnstr\n" \
"661" #n ":\n\t" oldinstr "\n662:\n" \
"# ALT: padding\n" \
".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
"((" alt_rlen ")-(" alt_slen ")),0x90\n" \
"663:\n"
#define ALTINSTR_ENTRY(ft_flags) \
".pushsection .altinstructions,\"a\"\n" \
" .long 6610b - .\n" /* label */ \
" .long 664f - .\n" /* new instruction */ \
" .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
" .byte " alt_total_slen "\n" /* source len */ \
" .byte " alt_rlen "\n" /* replacement len */ \
".popsection\n"
#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
".pushsection .altinstr_replacement, \"ax\"\n" \
"# ALT: replacement \n" \
"664:\n\t" newinstr "\n 665:\n" \
".popsection\n"
/*
* Define an alternative between two instructions. If @ft_flags is
* present, early code in apply_alternatives() replaces @oldinstr with
* @newinstr. ".skip" directive takes care of proper instruction padding
* in case @newinstr is longer than @oldinstr.
*
* Notably: @oldinstr may be an ALTERNATIVE() itself, also see
* apply_alternatives()
*/
#define __ALTERNATIVE(oldinstr, newinstr, ft_flags, n) \
OLDINSTR(oldinstr, n) \
ALTINSTR_ENTRY(ft_flags) \
ALTINSTR_REPLACEMENT(newinstr)
#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
__ALTERNATIVE(oldinstr, newinstr, ft_flags, 0)
#define ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2) \
__ALTERNATIVE(__ALTERNATIVE(oldinst, newinst1, flag1, 0), \
newinst2, flag2, 1)
#define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
asm __inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory")
int main(void)
{
alternative_2("", "pop %%rax", 1, "call main", 1);
return 0;
}
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Thu, Sep 07, 2023 at 05:30:36PM +0200, Borislav Petkov wrote:
> But I might be missing something so lemme poke at it more.
So my guest boots with the below diff ontop of yours. That doesn't mean
a whole lot but it looks like it DTRT. (Btw, the DPRINTK hunk is hoisted
up only for debugging - not part of the change).
And I've backed out your handling of the additional padding because, as
we've established, that's not really additional padding but the padding
which is missing when a subsequent sequence is longer.
It all ends up being a single consecutive region of padding as it should
be.
Building that says:
arch/x86/entry/entry_64.o: warning: objtool: entry_SYSCALL_64+0x91: weirdly overlapping alternative! 5 != 16
arch/x86/entry/entry_64_compat.o: warning: objtool: entry_SYSENTER_compat+0x80: weirdly overlapping alternative! 5 != 16
but that warning is bogus because the code in question is the
UNTRAIN_RET macro which has an empty orig insn, then two CALLs of size
5 and then the RESET_CALL_DEPTH sequence which is 16 bytes.
At build time it looks like this:
ffffffff81c000d1: 90 nop
ffffffff81c000d2: 90 nop
ffffffff81c000d3: 90 nop
ffffffff81c000d4: 90 nop
ffffffff81c000d5: 90 nop
ffffffff81c000d6: 90 nop
ffffffff81c000d7: 90 nop
ffffffff81c000d8: 90 nop
ffffffff81c000d9: 90 nop
ffffffff81c000da: 90 nop
ffffffff81c000db: 90 nop
ffffffff81c000dc: 90 nop
ffffffff81c000dd: 90 nop
ffffffff81c000de: 90 nop
ffffffff81c000df: 90 nop
ffffffff81c000e0: 90 nop
and those are 16 contiguous NOPs of padding.
At boot time, it does:
[ 0.679523] SMP alternatives: feat: 11*32+15, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a362b, len: 5)
[ 0.683516] SMP alternatives: ffffffff81c000d1: [0:5) optimized NOPs: 0f 1f 44 00 00
That first one is X86_FEATURE_UNRET and the alt_instr descriptor simply
says that the replacement is 5 bytes long, which is the CALL that can
potentially be poked in. It doesn't care about the following 11 bytes of
padding because it doesn't matter - it wants 5 bytes only for the CALL.
[ 0.687514] SMP alternatives: feat: 11*32+10, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a3630, len: 5)
[ 0.691521] SMP alternatives: ffffffff81c000d1: [0:5) optimized NOPs: 0f 1f 44 00 00
This is X86_FEATURE_ENTRY_IBPB. Same thing.
[ 0.695515] SMP alternatives: feat: 11*32+19, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 16), repl: (ffffffff833a3635, len: 16)
[ 0.699516] SMP alternatives: ffffffff81c000d1: [0:16) optimized NOPs: eb 0e cc cc cc cc cc cc cc cc cc cc cc cc cc cc
And this is X86_FEATURE_CALL_DEPTH and here the alt_instr descriptor has
replacement length of 16 and that is all still ok as it starts at the
same address and contains the first 5 bytes from the previous entries
which overlap here.
So address-wise we're good, the alt_instr patching descriptors are
correct and we should be good.
Thoughts?
---
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index df128ff49d60..de612307ed1e 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -150,13 +150,13 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
-#define alt_slen "662b-661b"
-#define alt_total_slen "663b-661b"
+#define alt_slen "662b-6610b"
+#define alt_total_slen "663b-6610b"
#define alt_rlen "665f-664f"
-#define OLDINSTR(oldinstr) \
- "# ALT: oldnstr\n" \
- "661:\n\t" oldinstr "\n662:\n" \
+#define OLDINSTR(oldinstr, n) \
+ "# ALT: oldinstr\n" \
+ "661" #n ":\n\t" oldinstr "\n662:\n" \
"# ALT: padding\n" \
".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
"((" alt_rlen ")-(" alt_slen ")),0x90\n" \
@@ -164,7 +164,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define ALTINSTR_ENTRY(ft_flags) \
".pushsection .altinstructions,\"a\"\n" \
- " .long 661b - .\n" /* label */ \
+ " .long 6610b - .\n" /* label */ \
" .long 664f - .\n" /* new instruction */ \
" .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
" .byte " alt_total_slen "\n" /* source len */ \
@@ -185,15 +185,25 @@ static inline int alternatives_text_reserved(void *start, void *end)
*
* Notably: @oldinstr may be an ALTERNATIVE() itself, also see
* apply_alternatives()
+ *
+ * @n: nesting level. Because those macros are nested, in order to
+ * compute the source length and the total source length including the
+ * padding, the nesting level is used to define unique labels. The
+ * nesting level increases from the innermost macro invocation outwards,
+ * starting with 0. Thus, the correct starting label of oldinstr is
+ * 6610 which is hardcoded in the macros above.
*/
-#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- OLDINSTR(oldinstr) \
+#define __ALTERNATIVE(oldinstr, newinstr, ft_flags, n) \
+ OLDINSTR(oldinstr, n) \
ALTINSTR_ENTRY(ft_flags) \
ALTINSTR_REPLACEMENT(newinstr)
+#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+ __ALTERNATIVE(oldinstr, newinstr, ft_flags, 0)
+
#define ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2) \
- ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1), \
- newinst2, flag2)
+ __ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1), \
+ newinst2, flag2, 1)
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
@@ -202,8 +212,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define ALTERNATIVE_3(oldinst, newinst1, flag1, newinst2, flag2, \
newinst3, flag3) \
- ALTERNATIVE(ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2), \
- newinst3, flag3)
+ __ALTERNATIVE(ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2), \
+ newinst3, flag3, 2)
/*
* Alternative instructions for different CPU types or capabilities.
@@ -328,14 +338,18 @@ static inline int alternatives_text_reserved(void *start, void *end)
.byte \alt_len
.endm
-#define __ALTERNATIVE(oldinst, newinst, flag) \
-140: \
+/*
+ * Make sure the innermost macro invocation passes in as label "1400"
+ * as it is used for @oldinst sizing further down here.
+ */
+#define __ALTERNATIVE(oldinst, newinst, flag, label) \
+label: \
oldinst ; \
141: \
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ;\
+ .skip -(((144f-143f)-(141b-1400b)) > 0) * ((144f-143f)-(141b-1400b)),0x90 ;\
142: \
.pushsection .altinstructions,"a" ; \
- altinstr_entry 140b,143f,flag,142b-140b,144f-143f ; \
+ altinstr_entry 1400b,143f,flag,142b-1400b,144f-143f ; \
.popsection ; \
.pushsection .altinstr_replacement,"ax" ; \
143: \
@@ -350,7 +364,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
* in case @newinstr is longer than @oldinstr.
*/
.macro ALTERNATIVE oldinstr, newinstr, ft_flags
- __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
+ __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags, 1400)
.endm
/*
@@ -359,14 +373,14 @@ static inline int alternatives_text_reserved(void *start, void *end)
* @feature2, it replaces @oldinstr with @feature2.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
- __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
- \newinstr2, \ft_flags2)
+ __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1, 1400),
+ \newinstr2, \ft_flags2, 1401)
.endm
.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3
- __ALTERNATIVE(__ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
- \newinstr2, \ft_flags2),
- \newinstr3, \ft_flags3)
+ __ALTERNATIVE(__ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1, 1400),
+ \newinstr2, \ft_flags2, 1401),
+ \newinstr3, \ft_flags3, 1402)
.endm
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index fa9eb5f1ff1e..aa0ea0317127 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -398,7 +398,7 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
void __init_or_module noinline apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
- struct alt_instr *a, *b;
+ struct alt_instr *a;
u8 *instr, *replacement;
u8 insn_buff[MAX_PATCH_LEN];
@@ -415,22 +415,18 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
- /*
- * In case of nested ALTERNATIVE()s the outer alternative might
- * add more padding. To ensure consistent patching find the max
- * padding for all alt_instr entries for this site (nested
- * alternatives result in consecutive entries).
- */
- for (b = a+1; b < end && b->instr_offset == a->instr_offset; b++) {
- u8 len = max(a->instrlen, b->instrlen);
- a->instrlen = b->instrlen = len;
- }
-
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
+ DPRINTK(ALT, "feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
+ (a->flags & ALT_FLAG_NOT) ? "!" : "",
+ a->cpuid >> 5,
+ a->cpuid & 0x1f,
+ instr, instr, a->instrlen,
+ replacement, a->replacementlen);
+
/*
* Patch if either:
* - feature is present
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index a4ecb04d8d64..37328ffc72bb 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -109,7 +109,7 @@ static inline u64 xfeatures_mask_independent(void)
*
* We use XSAVE as a fallback.
*
- * The 661 label is defined in the ALTERNATIVE* macros as the address of the
+ * The 6610 label is defined in the ALTERNATIVE* macros as the address of the
* original instruction which gets replaced. We need to use it here as the
* address of the instruction where we might get an exception at.
*/
@@ -121,7 +121,7 @@ static inline u64 xfeatures_mask_independent(void)
"\n" \
"xor %[err], %[err]\n" \
"3:\n" \
- _ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
+ _ASM_EXTABLE_TYPE_REG(6610b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
@@ -135,7 +135,7 @@ static inline u64 xfeatures_mask_independent(void)
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
"3:\n" \
- _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \
+ _ASM_EXTABLE_TYPE(6610b, 3b, EX_TYPE_FPU_RESTORE) \
: \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 7145920a7aba..29e949579ede 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -9,29 +9,6 @@
void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
{
- static struct special_alt *group, *prev;
-
- /*
- * Recompute orig_len for nested ALTERNATIVE()s.
- */
- if (group && group->orig_sec == alt->orig_sec &&
- group->orig_off == alt->orig_off) {
-
- struct special_alt *iter = group;
- for (;;) {
- unsigned int len = max(iter->orig_len, alt->orig_len);
- iter->orig_len = alt->orig_len = len;
-
- if (iter == prev)
- break;
-
- iter = list_next_entry(iter, list);
- }
-
- } else group = alt;
-
- prev = alt;
-
switch (feature) {
case X86_FEATURE_SMAP:
/*
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Sat, Sep 09, 2023 at 09:50:09AM +0200, Borislav Petkov wrote:
> On Thu, Sep 07, 2023 at 05:30:36PM +0200, Borislav Petkov wrote:
> > But I might be missing something so lemme poke at it more.
>
> So my guest boots with the below diff ontop of yours. That doesn't mean
> a whole lot but it looks like it DTRT. (Btw, the DPRINTK hunk is hoisted
> up only for debugging - not part of the change).
>
> And I've backed out your handling of the additional padding because, as
> we've established, that's not really additional padding but the padding
> which is missing when a subsequent sequence is longer.
>
> It all ends up being a single consecutive region of padding as it should
> be.
>
> Building that says:
>
> arch/x86/entry/entry_64.o: warning: objtool: entry_SYSCALL_64+0x91: weirdly overlapping alternative! 5 != 16
> arch/x86/entry/entry_64_compat.o: warning: objtool: entry_SYSENTER_compat+0x80: weirdly overlapping alternative! 5 != 16
>
> but that warning is bogus because the code in question is the
> UNTRAIN_RET macro which has an empty orig insn, then two CALLs of size
> 5 and then the RESET_CALL_DEPTH sequence which is 16 bytes.
>
> At build time it looks like this:
>
> ffffffff81c000d1: 90 nop
> ffffffff81c000d2: 90 nop
> ffffffff81c000d3: 90 nop
> ffffffff81c000d4: 90 nop
> ffffffff81c000d5: 90 nop
> ffffffff81c000d6: 90 nop
> ffffffff81c000d7: 90 nop
> ffffffff81c000d8: 90 nop
> ffffffff81c000d9: 90 nop
> ffffffff81c000da: 90 nop
> ffffffff81c000db: 90 nop
> ffffffff81c000dc: 90 nop
> ffffffff81c000dd: 90 nop
> ffffffff81c000de: 90 nop
> ffffffff81c000df: 90 nop
> ffffffff81c000e0: 90 nop
>
> and those are 16 contiguous NOPs of padding.
>
> At boot time, it does:
>
> [ 0.679523] SMP alternatives: feat: 11*32+15, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a362b, len: 5)
> [ 0.683516] SMP alternatives: ffffffff81c000d1: [0:5) optimized NOPs: 0f 1f 44 00 00
>
> That first one is X86_FEATURE_UNRET and the alt_instr descriptor simply
> says that the replacement is 5 bytes long, which is the CALL that can
> potentially be poked in. It doesn't care about the following 11 bytes of
> padding because it doesn't matter - it wants 5 bytes only for the CALL.
>
> [ 0.687514] SMP alternatives: feat: 11*32+10, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a3630, len: 5)
> [ 0.691521] SMP alternatives: ffffffff81c000d1: [0:5) optimized NOPs: 0f 1f 44 00 00
>
> This is X86_FEATURE_ENTRY_IBPB. Same thing.
>
> [ 0.695515] SMP alternatives: feat: 11*32+19, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 16), repl: (ffffffff833a3635, len: 16)
> [ 0.699516] SMP alternatives: ffffffff81c000d1: [0:16) optimized NOPs: eb 0e cc cc cc cc cc cc cc cc cc cc cc cc cc cc
>
> And this is X86_FEATURE_CALL_DEPTH and here the alt_instr descriptor has
> replacement length of 16 and that is all still ok as it starts at the
> same address and contains the first 5 bytes from the previous entries
> which overlap here.
>
> So address-wise we're good, the alt_instr patching descriptors are
> correct and we should be good.
>
> Thoughts?
>
> ---
>
> @@ -415,22 +415,18 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
> for (a = start; a < end; a++) {
> int insn_buff_sz = 0;
>
> - /*
> - * In case of nested ALTERNATIVE()s the outer alternative might
> - * add more padding. To ensure consistent patching find the max
> - * padding for all alt_instr entries for this site (nested
> - * alternatives result in consecutive entries).
> - */
> - for (b = a+1; b < end && b->instr_offset == a->instr_offset; b++) {
> - u8 len = max(a->instrlen, b->instrlen);
> - a->instrlen = b->instrlen = len;
> - }
> -
> instr = (u8 *)&a->instr_offset + a->instr_offset;
> replacement = (u8 *)&a->repl_offset + a->repl_offset;
> BUG_ON(a->instrlen > sizeof(insn_buff));
> BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
>
> diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
> index 7145920a7aba..29e949579ede 100644
> --- a/tools/objtool/arch/x86/special.c
> +++ b/tools/objtool/arch/x86/special.c
> @@ -9,29 +9,6 @@
>
> void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
> {
> - static struct special_alt *group, *prev;
> -
> - /*
> - * Recompute orig_len for nested ALTERNATIVE()s.
> - */
> - if (group && group->orig_sec == alt->orig_sec &&
> - group->orig_off == alt->orig_off) {
> -
> - struct special_alt *iter = group;
> - for (;;) {
> - unsigned int len = max(iter->orig_len, alt->orig_len);
> - iter->orig_len = alt->orig_len = len;
> -
> - if (iter == prev)
> - break;
> -
> - iter = list_next_entry(iter, list);
> - }
> -
> - } else group = alt;
> -
> - prev = alt;
> -
> switch (feature) {
> case X86_FEATURE_SMAP:
> /*
Yeah, that wasn't optional.
So what you end up with is:
661:
"one byte orig insn"
"one nop because alt1 is 2 bytes"
"one nop because alt2 is 3 bytes"
right?
But your alt_instr are:
alt_instr1 = {
.instr_offset = 661b-.; /* .text location */
.repl_offset = 664f-.; /* .altinstr_replacement location */
/* .ft_flags */
.instrlen = 2;
.replacementlen = 2;
}
alt_instr2 = {
.instr_offset = 661b-.;
.repl_offset = 664f-.;
/* .ft_flags */
.instrlen = 3;
.replacementlen = 3;
}
So if you patch alt2, you will only patch 2 bytes of the original text,
even though that has 3 bytes of 'space'.
This becomes more of a problem with your example above where the
respective lengths are 0, 5, 16. In that case, when you patch 5, you'll
leave 11 single nops in there.
So what that code you deleted does is look for all alternatives that
start at the same point and computes the max replacementlen, because
that is the amount of bytes in the source text that has been reserved
for this alternative.
That is not optional.
On Sat, Sep 09, 2023 at 11:25:54AM +0200, Peter Zijlstra wrote:
> So what you end up with is:
>
> 661:
> "one byte orig insn"
> "one nop because alt1 is 2 bytes"
> "one nop because alt2 is 3 bytes"
>
> right?
Right.
> This becomes more of a problem with your example above where the
> respective lengths are 0, 5, 16. In that case, when you patch 5, you'll
> leave 11 single nops in there.
Well, I know what you mean but the code handles that gracefully and it
works. Watch this:
I made it always apply the second one and not apply the third, the
longest one:
.macro UNTRAIN_RET
#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
VALIDATE_UNRET_END
ALTERNATIVE_3 "", \
CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
"call entry_ibpb", X86_FEATURE_ALWAYS, \
__stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm
So it comes in and pokes in the padding for the first one:
X86_FEATURE_UNRET
[ 0.903506] SMP alternatives: feat: 11*32+15, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a362b, len: 5)
[ 0.911256] SMP alternatives: ffffffff81c000d1: [0:5) optimized NOPs: 0f 1f 44 00 00
Then patches in the entry_ibpb call:
[ 0.916849] SMP alternatives: feat: 3*32+21, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a3630, len: 5)
[ 0.924844] SMP alternatives: ffffffff81c000d1: old_insn: 0f 1f 44 00 00
[ 0.928842] SMP alternatives: ffffffff833a3630: rpl_insn: e8 5b 9e 81 fe
[ 0.932849] SMP alternatives: ffffffff81c000d1: final_insn: e8 ba d3 fb ff
and now it comes to the call depth thing which is of size 16:
[ 0.936845] SMP alternatives: feat: 11*32+19, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 16), repl: (ffffffff833a3635, len: 16)
[ 0.940844] SMP alternatives: __optimize_nops: next: 5, insn len: 5
and this is why it works: __optimize_nops() is cautious enough to do
insn_is_nop() and there is the CALL insn: e8 ba d3 fb ff, it skips over
it:
[ 0.944852] SMP alternatives: __optimize_nops: next: 6, insn len: 1
next one is a NOP and it patches the rest of it. Resulting in an 11
bytes NOP:
[ 0.950758] SMP alternatives: ffffffff81c000d1: [5:16) optimized NOPs: e8 ba d3 fb ff 66 66 2e 0f 1f 84 00 00 00 00 00
So we're good here without this max(repl_len) thing even if it is the
right thing to do.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Sun, Sep 10, 2023 at 04:42:27PM +0200, Borislav Petkov wrote: > On Sat, Sep 09, 2023 at 11:25:54AM +0200, Peter Zijlstra wrote: > > So what you end up with is: > > > > 661: > > "one byte orig insn" > > "one nop because alt1 is 2 bytes" > > "one nop because alt2 is 3 bytes" > > > > right? > > Right. > > > This becomes more of a problem with your example above where the > > respective lengths are 0, 5, 16. In that case, when you patch 5, you'll > > leave 11 single nops in there. > > Well, I know what you mean but the code handles that gracefully and it > works. Watch this: Aah, because we run optimize_nops() for all alternatives, irrespective of it being selected. And thus also for the longest and then that'll fix things up. OK, let me check on objtool.
On Tue, Sep 12, 2023 at 11:27:09AM +0200, Peter Zijlstra wrote:
> Aah, because we run optimize_nops() for all alternatives, irrespective
> of it being selected.
Yeah, and I remember us talking about it the last time you did it and
how it would be a good idea to do that but be careful about it...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Sep 12, 2023 at 11:27:09AM +0200, Peter Zijlstra wrote: > On Sun, Sep 10, 2023 at 04:42:27PM +0200, Borislav Petkov wrote: > > On Sat, Sep 09, 2023 at 11:25:54AM +0200, Peter Zijlstra wrote: > > > So what you end up with is: > > > > > > 661: > > > "one byte orig insn" > > > "one nop because alt1 is 2 bytes" > > > "one nop because alt2 is 3 bytes" > > > > > > right? > > > > Right. > > > > > This becomes more of a problem with your example above where the > > > respective lengths are 0, 5, 16. In that case, when you patch 5, you'll > > > leave 11 single nops in there. > > > > Well, I know what you mean but the code handles that gracefully and it > > works. Watch this: > > Aah, because we run optimize_nops() for all alternatives, irrespective > of it being selected. And thus also for the longest and then that'll fix > things up. > > OK, let me check on objtool. OK, I think objtool really does need the hunk you took out. The problem there is that we're having to create ORC data that is valid for all possible alternatives -- there is only one ORC table (unless we go dynamically patch the ORC table too, but so far we've managed to avoid doing that). The constraint we have is that for every address the ORC data must match between the alternatives, but because x86 is a variable length instruction encoding we can (and do) play games. As long as the instruction addresses do not line up, they can have different ORC data. One place where this matters is the tail, if we consider this a string of single byte nops, that forces a bunch of ORC state to match. So what we do is that we assume the tail is a single large NOP, this way we get minimal overlap / ORC conflicts. As such, we need to know the max length when constructing the alternatives, otherwise you get short alternatives jumping to somewhere in the middle of the actual range and well, see above.
On Tue, Sep 12, 2023 at 11:44:41AM +0200, Peter Zijlstra wrote:
> OK, I think objtool really does need the hunk you took out.
>
> The problem there is that we're having to create ORC data that is valid
> for all possible alternatives -- there is only one ORC table (unless we
> go dynamically patch the ORC table too, but so far we've managed to
> avoid doing that).
>
> The constraint we have is that for every address the ORC data must match
> between the alternatives, but because x86 is a variable length
> instruction encoding we can (and do) play games. As long as the
> instruction addresses do not line up, they can have different ORC data.
>
> One place where this matters is the tail, if we consider this a string
> of single byte nops, that forces a bunch of ORC state to match. So what
> we do is that we assume the tail is a single large NOP, this way we get
> minimal overlap / ORC conflicts.
>
> As such, we need to know the max length when constructing the
> alternatives, otherwise you get short alternatives jumping to somewhere
> in the middle of the actual range and well, see above.
Lemme make sure I understand this correctly. We have a three-way
alternative in our example with the descrptors saying this:
feat: 11*32+15, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a362b, len: 5)
feat: 3*32+21, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a3630, len: 5)
feat: 11*32+19, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 16), repl: (ffffffff833a3635, len: 16)
i.e., the address to patch each time is ffffffff81c000d1, and the length
is different - 5, 5 and 16.
So that ORC data is tracking the starting address and the length?
I guess I don't fully understand the "middle of the actual range" thing
because you don't really have a middle - you have the starting address
and a length.
Or are you saying that the differing length would cause ORC conflicts?
In any case, I guess I could extend your commit with what we've figured
out in this thread and send a new version of what I think it should look
like and I can start testing it on my pile of hw next week, when I get
back...
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Wed, Sep 13, 2023 at 06:37:38AM +0200, Borislav Petkov wrote: > On Tue, Sep 12, 2023 at 11:44:41AM +0200, Peter Zijlstra wrote: > > OK, I think objtool really does need the hunk you took out. > > > > The problem there is that we're having to create ORC data that is valid > > for all possible alternatives -- there is only one ORC table (unless we > > go dynamically patch the ORC table too, but so far we've managed to > > avoid doing that). > > > > The constraint we have is that for every address the ORC data must match > > between the alternatives, but because x86 is a variable length > > instruction encoding we can (and do) play games. As long as the > > instruction addresses do not line up, they can have different ORC data. > > > > One place where this matters is the tail, if we consider this a string > > of single byte nops, that forces a bunch of ORC state to match. So what > > we do is that we assume the tail is a single large NOP, this way we get > > minimal overlap / ORC conflicts. > > > > As such, we need to know the max length when constructing the > > alternatives, otherwise you get short alternatives jumping to somewhere > > in the middle of the actual range and well, see above. > > Lemme make sure I understand this correctly. We have a three-way > alternative in our example with the descrptors saying this: > > feat: 11*32+15, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a362b, len: 5) > feat: 3*32+21, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 5), repl: (ffffffff833a3630, len: 5) > feat: 11*32+19, old: (entry_SYSCALL_64_after_hwframe+0x59/0xd8 (ffffffff81c000d1) len: 16), repl: (ffffffff833a3635, len: 16) > > i.e., the address to patch each time is ffffffff81c000d1, and the length > is different - 5, 5 and 16. > > So that ORC data is tracking the starting address and the length? No, ORC data tracks the address of every instruction that can possibly exist in that range -- with the constraint that if two instructions have the same address, the ORC data must match. To reduce instruction edges in that range, we make sure the tail is a single large instruction to the end of the alternative. But since we now have variable length alternatives, we must search the max length. > I guess I don't fully understand the "middle of the actual range" thing > because you don't really have a middle - you have the starting address > and a length. The alternative in the source location is of size max-length. Because there must be room to patch in the longest alternative. If you allow short alternatives you get: CALL entry_untrain_ret nop nop nop nop nop nop nop nop nop nop nop Which is significantly different from: CALL entry_untrain_ret nop11 In that is has about 10 less ORC entries. But in order to build that nop11 we must know the max size. > Or are you saying that the differing length would cause ORC conflicts? Yes, see above, the short alternative will want to continue at +5, but we have a string of 1 byte nops there, and this will then constrain things. What objtool does/want is make then all of the same size so all the tails are a single instruction to +16 so that we can disregard what is in the actual tail. We've gone over this multiple times already, also see commit 6c480f222128. That made sure the kernel side adhered to this scheme by making the tail a single instruction irrespective of the length.
On Wed, Sep 13, 2023 at 10:46:58AM +0200, Peter Zijlstra wrote:
> We've gone over this multiple times already, also see commit
> 6c480f222128. That made sure the kernel side adhered to this scheme by
> making the tail a single instruction irrespective of the length.
Yes, sorry about that. I've been putting off digging deep into objtool
internals for a while now and there are no more excuses so lemme finally
read that whole thing and what it does in detail. And thanks for
explaining again. :-\
As to the patch at hand, how does the below look like?
Thx.
---
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 14 Aug 2023 13:44:36 +0200
Subject: [PATCH] x86/alternatives: Simplify ALTERNATIVE_n()
Instead of making increasingly complicated ALTERNATIVE_n()
implementations, use a nested alternative expression.
The only difference between:
ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2)
and
ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1),
newinst2, flag2)
is that the outer alternative can add additional padding - padding which
is needed when newinst2 is longer than oldinst and newinst1 combined
- which then results in alt_instr::instrlen being inconsistent.
However, this is easily remedied since the alt_instr entries will be
consecutive and it is trivial to compute the max(alt_instr::instrlen)
at runtime while patching.
The correct max length of all the alternative insn variants is needed
for ORC unwinding CFI tracking data to be correct. For details, see
6c480f222128 ("x86/alternative: Rewrite optimize_nops() some")
[ bp: Make labels unique and thus all sizing use unambiguous labels.
Add more info. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lkml.kernel.org/r/20230628104952.GA2439977@hirez.programming.kicks-ass.net
---
arch/x86/include/asm/alternative.h | 226 ++++++++++-------------------
arch/x86/kernel/alternative.c | 18 ++-
arch/x86/kernel/fpu/xstate.h | 6 +-
tools/objtool/arch/x86/special.c | 23 +++
tools/objtool/special.c | 16 +-
5 files changed, 125 insertions(+), 164 deletions(-)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 9c4da699e11a..bcdce6026301 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -150,102 +150,70 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
-#define b_replacement(num) "664"#num
-#define e_replacement(num) "665"#num
+#define alt_slen "662b-6610b"
+#define alt_total_slen "663b-6610b"
+#define alt_rlen "665f-664f"
-#define alt_end_marker "663"
-#define alt_slen "662b-661b"
-#define alt_total_slen alt_end_marker"b-661b"
-#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
-
-#define OLDINSTR(oldinstr, num) \
- "# ALT: oldnstr\n" \
- "661:\n\t" oldinstr "\n662:\n" \
+#define OLDINSTR(oldinstr, n) \
+ "# ALT: oldinstr\n" \
+ "661" #n ":\n\t" oldinstr "\n662:\n" \
"# ALT: padding\n" \
- ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \
- alt_end_marker ":\n"
+ ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
+ "663:\n"
+
+#define ALTINSTR_ENTRY(ft_flags) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ " .long 6610b - .\n" /* label */ \
+ " .long 664f - .\n" /* new instruction */ \
+ " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
+ " .byte " alt_total_slen "\n" /* source len */ \
+ " .byte " alt_rlen "\n" /* replacement len */ \
+ ".popsection\n"
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
+#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ "# ALT: replacement \n" \
+ "664:\n\t" newinstr "\n 665:\n" \
+ ".popsection\n"
/*
- * Pad the second replacement alternative with additional NOPs if it is
- * additionally longer than the first replacement alternative.
+ * Define an alternative between two instructions. If @ft_flags is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ *
+ * Notably: @oldinstr may be an ALTERNATIVE() itself, also see
+ * apply_alternatives()
+ *
+ * @n: nesting level. Because those macros can be nested, in order to
+ * compute the source length and the total source length including the
+ * padding, the nesting level is used to define unique labels. The
+ * nesting level increases from the innermost macro invocation outwards,
+ * starting with 0. Thus, the correct starting label of oldinstr is
+ * 6610 which is hardcoded in the macros above.
*/
-#define OLDINSTR_2(oldinstr, num1, num2) \
- "# ALT: oldinstr2\n" \
- "661:\n\t" oldinstr "\n662:\n" \
- "# ALT: padding2\n" \
- ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define OLDINSTR_3(oldinsn, n1, n2, n3) \
- "# ALT: oldinstr3\n" \
- "661:\n\t" oldinsn "\n662:\n" \
- "# ALT: padding3\n" \
- ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define ALTINSTR_ENTRY(ft_flags, num) \
- " .long 661b - .\n" /* label */ \
- " .long " b_replacement(num)"f - .\n" /* new instruction */ \
- " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
- " .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen(num) "\n" /* replacement len */
+#define __ALTERNATIVE(oldinstr, newinstr, ft_flags, n) \
+ OLDINSTR(oldinstr, n) \
+ ALTINSTR_ENTRY(ft_flags) \
+ ALTINSTR_REPLACEMENT(newinstr)
-#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \
- "# ALT: replacement " #num "\n" \
- b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
-
-/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- OLDINSTR(oldinstr, 1) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags, 1) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr, 1) \
- ".popsection\n"
+ __ALTERNATIVE(oldinstr, newinstr, ft_flags, 0)
-#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
- OLDINSTR_2(oldinstr, 1, 2) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr1, 1) \
- ALTINSTR_REPLACEMENT(newinstr2, 2) \
- ".popsection\n"
+#define ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2) \
+ __ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1), \
+ newinst2, flag2, 1)
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
newinstr_yes, ft_flags)
-#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \
- newinsn3, ft_flags3) \
- OLDINSTR_3(oldinsn, 1, 2, 3) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ALTINSTR_ENTRY(ft_flags3, 3) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinsn1, 1) \
- ALTINSTR_REPLACEMENT(newinsn2, 2) \
- ALTINSTR_REPLACEMENT(newinsn3, 3) \
- ".popsection\n"
+#define ALTERNATIVE_3(oldinst, newinst1, flag1, newinst2, flag2, \
+ newinst3, flag3) \
+ __ALTERNATIVE(ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2), \
+ newinst3, flag3, 2)
/*
* Alternative instructions for different CPU types or capabilities.
@@ -370,6 +338,25 @@ static inline int alternatives_text_reserved(void *start, void *end)
.byte \alt_len
.endm
+/*
+ * Make sure the innermost macro invocation passes in as label "1400"
+ * as it is used for @oldinst sizing further down here.
+ */
+#define __ALTERNATIVE(oldinst, newinst, flag, label) \
+label: \
+ oldinst ; \
+141: \
+ .skip -(((144f-143f)-(141b-1400b)) > 0) * ((144f-143f)-(141b-1400b)),0x90 ;\
+142: \
+ .pushsection .altinstructions,"a" ; \
+ altinstr_entry 1400b,143f,flag,142b-1400b,144f-143f ; \
+ .popsection ; \
+ .pushsection .altinstr_replacement,"ax" ; \
+143: \
+ newinst ; \
+144: \
+ .popsection ;
+
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
@@ -377,88 +364,23 @@ static inline int alternatives_text_reserved(void *start, void *end)
* in case @newinstr is longer than @oldinstr.
*/
.macro ALTERNATIVE oldinstr, newinstr, ft_flags
-140:
- \oldinstr
-141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags,142b-140b,144f-143f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr
-144:
- .popsection
+ __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags, 1400)
.endm
-#define old_len 141b-140b
-#define new_len1 144f-143f
-#define new_len2 145f-144f
-#define new_len3 146f-145f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c))
-
-
/*
* Same as ALTERNATIVE macro above but for two alternatives. If CPU
* has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
* @feature2, it replaces @oldinstr with @feature2.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
-140:
- \oldinstr
-141:
- .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_2(new_len1, new_len2) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- .popsection
+ __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1, 1400),
+ \newinstr2, \ft_flags2, 1401)
.endm
.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3
-140:
- \oldinstr
-141:
- .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \
- (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- altinstr_entry 140b,145f,\ft_flags3,142b-140b,146f-145f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- \newinstr3
-146:
- .popsection
+ __ALTERNATIVE(__ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1, 1400),
+ \newinstr2, \ft_flags2, 1401),
+ \newinstr3, \ft_flags3, 1402)
.endm
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a5ead6a6d233..bcbef8ce9d94 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -398,7 +398,7 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
void __init_or_module noinline apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
- struct alt_instr *a;
+ struct alt_instr *a, *b;
u8 *instr, *replacement;
u8 insn_buff[MAX_PATCH_LEN];
@@ -415,6 +415,22 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
+ /*
+ * In case of nested ALTERNATIVE()s the outer alternative might
+ * add more padding. To ensure consistent patching find the max
+ * padding for all alt_instr entries for this site (nested
+ * alternatives result in consecutive entries).
+ *
+ * Patching works even without it but ORC unwinder CFI tracking
+ * turns the last trailing NOP into a max size instruction
+ * so that edge data can be kept at a minimum, see
+ * comment over add_nop().
+ */
+ for (b = a+1; b < end && b->instr_offset == a->instr_offset; b++) {
+ u8 len = max(a->instrlen, b->instrlen);
+ a->instrlen = b->instrlen = len;
+ }
+
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index a4ecb04d8d64..37328ffc72bb 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -109,7 +109,7 @@ static inline u64 xfeatures_mask_independent(void)
*
* We use XSAVE as a fallback.
*
- * The 661 label is defined in the ALTERNATIVE* macros as the address of the
+ * The 6610 label is defined in the ALTERNATIVE* macros as the address of the
* original instruction which gets replaced. We need to use it here as the
* address of the instruction where we might get an exception at.
*/
@@ -121,7 +121,7 @@ static inline u64 xfeatures_mask_independent(void)
"\n" \
"xor %[err], %[err]\n" \
"3:\n" \
- _ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
+ _ASM_EXTABLE_TYPE_REG(6610b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
@@ -135,7 +135,7 @@ static inline u64 xfeatures_mask_independent(void)
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
"3:\n" \
- _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \
+ _ASM_EXTABLE_TYPE(6610b, 3b, EX_TYPE_FPU_RESTORE) \
: \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 29e949579ede..7145920a7aba 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -9,6 +9,29 @@
void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
{
+ static struct special_alt *group, *prev;
+
+ /*
+ * Recompute orig_len for nested ALTERNATIVE()s.
+ */
+ if (group && group->orig_sec == alt->orig_sec &&
+ group->orig_off == alt->orig_off) {
+
+ struct special_alt *iter = group;
+ for (;;) {
+ unsigned int len = max(iter->orig_len, alt->orig_len);
+ iter->orig_len = alt->orig_len = len;
+
+ if (iter == prev)
+ break;
+
+ iter = list_next_entry(iter, list);
+ }
+
+ } else group = alt;
+
+ prev = alt;
+
switch (feature) {
case X86_FEATURE_SMAP:
/*
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 91b1950f5bd8..097a69db82a0 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -84,6 +84,14 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
entry->new_len);
}
+ orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
+ if (!orig_reloc) {
+ WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
+ return -1;
+ }
+
+ reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off);
+
if (entry->feature) {
unsigned short feature;
@@ -94,14 +102,6 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
arch_handle_alternative(feature, alt);
}
- orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
- if (!orig_reloc) {
- WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
- return -1;
- }
-
- reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off);
-
if (!entry->group || alt->new_len) {
new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
if (!new_reloc) {
--
2.23.0
---
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Wed, Sep 13, 2023 at 04:38:47PM +0200, Borislav Petkov wrote: > [ bp: Make labels unique and thus all sizing use unambiguous labels. > Add more info. ] > +#define __ALTERNATIVE(oldinstr, newinstr, ft_flags, n) \ > + OLDINSTR(oldinstr, n) \ > + ALTINSTR_ENTRY(ft_flags) \ > + ALTINSTR_REPLACEMENT(newinstr) > +#define ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2) \ > + __ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1), \ > + newinst2, flag2, 1) > +#define ALTERNATIVE_3(oldinst, newinst1, flag1, newinst2, flag2, \ > + newinst3, flag3) \ > + __ALTERNATIVE(ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2), \ > + newinst3, flag3, 2) So I see what you did with that @n argument, but urgh, do we really need this? I mean, it just makes things harder to use and it doesn't actually fix anything.. :/
On Fri, Sep 15, 2023 at 09:46:47AM +0200, Peter Zijlstra wrote: > On Wed, Sep 13, 2023 at 04:38:47PM +0200, Borislav Petkov wrote: > > > [ bp: Make labels unique and thus all sizing use unambiguous labels. > > Add more info. ] > > > +#define __ALTERNATIVE(oldinstr, newinstr, ft_flags, n) \ > > + OLDINSTR(oldinstr, n) \ > > + ALTINSTR_ENTRY(ft_flags) \ > > + ALTINSTR_REPLACEMENT(newinstr) > > > +#define ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2) \ > > + __ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1), \ > > + newinst2, flag2, 1) > > > +#define ALTERNATIVE_3(oldinst, newinst1, flag1, newinst2, flag2, \ > > + newinst3, flag3) \ > > + __ALTERNATIVE(ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2), \ > > + newinst3, flag3, 2) > > > So I see what you did with that @n argument, but urgh, do we really need > this? I mean, it just makes things harder to use and it doesn't actually > fix anything.. :/ That is, if we can magic this using __COUNTER__ without a user interface penalty, then sure. But the last time I tried that I failed utterly and ended up with labels like: .Lalt_old___COUNTER__: no matter how many layers of CPP macro eval I stuck in it. So clearly I wasn't having a good day ....
On Fri, Sep 15, 2023 at 09:51:06AM +0200, Peter Zijlstra wrote:
> > So I see what you did with that @n argument, but urgh, do we really need
> > this? I mean, it just makes things harder to use and it doesn't actually
> > fix anything.. :/
It only addresses this repeating of the 661 labels:
# 53 "./arch/x86/include/asm/page_64.h" 1
# ALT: oldnstr
661:
# ALT: oldnstr
661:
call clear_page_orig #
662:
but this is only the produced asm which no one but me and you look at so
I guess it is not worth the effort.
I still think, though, that adding the comments explaining the situation
more is worth it because we will forget.
> That is, if we can magic this using __COUNTER__ without a user interface
> penalty, then sure. But the last time I tried that I failed utterly and
> ended up with labels like:
>
> .Lalt_old___COUNTER__:
>
> no matter how many layers of CPP macro eval I stuck in it. So clearly I
> wasn't having a good day ....
Yeah, I tried it too because Matz said it should work with it but
I failed too. Reportedly, the approach should be to do that in CPP and
use CPP even for the asm macro but my CPP-fu is basic, to say the least.
I'll poke him next time we meet - I might've missed an aspect.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Wed, Sep 13, 2023 at 04:38:47PM +0200, Borislav Petkov wrote: > On Wed, Sep 13, 2023 at 10:46:58AM +0200, Peter Zijlstra wrote: > > We've gone over this multiple times already, also see commit > > 6c480f222128. That made sure the kernel side adhered to this scheme by > > making the tail a single instruction irrespective of the length. > > Yes, sorry about that. I've been putting off digging deep into objtool > internals for a while now and there are no more excuses so lemme finally > read that whole thing and what it does in detail. And thanks for > explaining again. :-\ No worries, I always seem to forget a detail or two myself :-) > As to the patch at hand, how does the below look like? Brain is fried, I'll give it a look tomorrow.
On Sat, Sep 09, 2023 at 11:25:54AM +0200, Peter Zijlstra wrote: > This becomes more of a problem with your example above where the > respective lengths are 0, 5, 16. In that case, when you patch 5, you'll > leave 11 single nops in there. > > So what that code you deleted does is look for all alternatives that > start at the same point and computes the max replacementlen, because > that is the amount of bytes in the source text that has been reserved > for this alternative. > > That is not optional. Note that the original alternatives did this with the alt_max_*() macro at build time.
On 14.08.23 г. 14:44 ч., Peter Zijlstra wrote: > Instead of making increasingly complicated ALTERNATIVE_n() > implementations, use a nested alternative expression. > > The only difference between: > > ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2) > > and > > ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1), > newinst2, flag2) > > is that the outer alternative can add additional padding when the > inner alternative is the shorter one, which then results in > alt_instr::instrlen being inconsistent. > > However, this is easily remedied since the alt_instr entries will be > consecutive and it is trivial to compute the max(alt_instr::instrlen) > at runtime while patching. > > Specifically, after this patch the ALTERNATIVE_2 macro, after CPP > expansion (and manual layout), looks like this: > > .macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2 > 140: > > 140: \oldinstr ; > 141: .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ; > 142: .pushsection .altinstructions,"a" ; > altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f ; > .popsection ; .pushsection .altinstr_replacement,"ax" ; > 143: \newinstr1 ; > 144: .popsection ; ; > > 141: .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ; > 142: .pushsection .altinstructions,"a" ; > altinstr_entry 140b,143f,\ft_flags2,142b-140b,144f-143f ; > .popsection ; > .pushsection .altinstr_replacement,"ax" ; > 143: \newinstr2 ; > 144: .popsection ; > .endm > > The only label that is ambiguous is 140, however they all reference > the same spot, so that doesn't matter. > > NOTE: obviously only @oldinstr may be an alternative; making @newinstr > an alternative would mean patching .altinstr_replacement which very > likely isn't what is intended, also the labels will be confused in > that case. > Reviewed-by: Nikolay Borisov <nik.borisov@suse.com> Ps. I feel very "enlightened" knowing that GAS uses -1 to represent true ...
On Tue, Aug 15, 2023 at 11:49:16PM +0300, Nikolay Borisov wrote: > > > On 14.08.23 г. 14:44 ч., Peter Zijlstra wrote: > > Instead of making increasingly complicated ALTERNATIVE_n() > > implementations, use a nested alternative expression. > > > > The only difference between: > > > > ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2) > > > > and > > > > ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1), > > newinst2, flag2) > > > > is that the outer alternative can add additional padding when the > > inner alternative is the shorter one, which then results in > > alt_instr::instrlen being inconsistent. > > > > However, this is easily remedied since the alt_instr entries will be > > consecutive and it is trivial to compute the max(alt_instr::instrlen) > > at runtime while patching. > > > > Specifically, after this patch the ALTERNATIVE_2 macro, after CPP > > expansion (and manual layout), looks like this: > > > > .macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2 > > 140: > > > > 140: \oldinstr ; > > 141: .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ; > > 142: .pushsection .altinstructions,"a" ; > > altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f ; > > .popsection ; .pushsection .altinstr_replacement,"ax" ; > > 143: \newinstr1 ; > > 144: .popsection ; ; > > > > 141: .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 ; > > 142: .pushsection .altinstructions,"a" ; > > altinstr_entry 140b,143f,\ft_flags2,142b-140b,144f-143f ; > > .popsection ; > > .pushsection .altinstr_replacement,"ax" ; > > 143: \newinstr2 ; > > 144: .popsection ; > > .endm > > > > The only label that is ambiguous is 140, however they all reference > > the same spot, so that doesn't matter. > > > > NOTE: obviously only @oldinstr may be an alternative; making @newinstr > > an alternative would mean patching .altinstr_replacement which very > > likely isn't what is intended, also the labels will be confused in > > that case. > > > > Reviewed-by: Nikolay Borisov <nik.borisov@suse.com> > > Ps. I feel very "enlightened" knowing that GAS uses -1 to represent true ... Ah, but only sometimes ;-)
© 2016 - 2025 Red Hat, Inc.