Improve the readability and compactness of alternatives code.
---------------------
ALTERNATIVE() before:
---------------------
# ALT: oldinstr
771:
rep movsb
772:
# ALT: padding
.skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90
773:
.pushsection .altinstructions,"a"
.long 771b - .
.long 774f - .
.4byte (((1 << 0) << 16) | ((18*32+ 4)))
.byte 773b-771b
.byte 775f-774f
.popsection
.pushsection .altinstr_replacement, "ax"
# ALT: replacement
774:
call rep_movs_alternative
775:
.popsection
--------------------
ALTERNATIVE() after:
--------------------
# <ALTERNATIVE>
771: rep movsb
772: .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90
773:
# ALT ENTRY:
.pushsection .altinstructions,"a"; .long 771b - .; .long 774f - .; .4byte (((1 << 0) << 16) | ((18*32+ 4))); .byte 773b-771b; .byte 775f-774f; .popsection
# ALT REPLACEMENT:
.pushsection .altinstr_replacement, "ax"
774: call rep_movs_alternative
775:
.popsection
# </ALTERNATIVE>
-----------------------
ALTERNATIVE_2() before:
-----------------------
# ALT: oldinstr
771:
# ALT: oldinstr
771:
jmp 6f
772:
# ALT: padding
.skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90
773:
.pushsection .altinstructions,"a"
.long 771b - .
.long 774f - .
.4byte ( 3*32+21)
.byte 773b-771b
.byte 775f-774f
.popsection
.pushsection .altinstr_replacement, "ax"
# ALT: replacement
774:
jmp .L4 #
775:
.popsection
772:
# ALT: padding
.skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90
773:
.pushsection .altinstructions,"a"
.long 771b - .
.long 774f - .
.4byte 297 #
.byte 773b-771b
.byte 775f-774f
.popsection
.pushsection .altinstr_replacement, "ax"
# ALT: replacement
774:
775:
.popsection
----------------------
ALTERNATIVE_2() after:
----------------------
# <ALTERNATIVE_2>
771:
771: jmp 6f
772: .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90
773:
# ALT ENTRY:
.pushsection .altinstructions,"a"; .long 771b - .; .long 774f - .; .4byte ( 3*32+21); .byte 773b-771b; .byte 775f-774f; .popsection
# ALT REPLACEMENT:
.pushsection .altinstr_replacement, "ax"
774: jmp .L4 #
775:
.popsection
772: .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90
773:
# ALT ENTRY:
.pushsection .altinstructions,"a"; .long 771b - .; .long 774f - .; .4byte 297; .byte 773b-771b; .byte 775f-774f; .popsection #
# ALT REPLACEMENT:
.pushsection .altinstr_replacement, "ax"
774:
775:
.popsection
# </ALTERNATIVE_2>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
arch/x86/include/asm/alternative.h | 88 +++++++++++++++++++++---------
1 file changed, 63 insertions(+), 25 deletions(-)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4a37a8bd87fd..6472d53625dc 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -151,46 +151,84 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alt_rlen "775f-774f"
#define OLDINSTR(oldinstr) \
- "# ALT: oldinstr\n" \
- "771:\n\t" oldinstr "\n772:\n" \
- "# ALT: padding\n" \
- ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
+ "771:" oldinstr "\n" \
+ "772:\t.skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
"773:\n"
-#define ALTINSTR_ENTRY(ft_flags) \
- ".pushsection .altinstructions,\"a\"\n" \
- " .long 771b - .\n" /* label */ \
- " .long 774f - .\n" /* new instruction */ \
- " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
- " .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen "\n" /* replacement len */ \
+#define ALTINSTR_ENTRY(ft_flags) \
+ "# ALT ENTRY:\n" \
+ ".pushsection .altinstructions,\"a\"; " \
+ " .long 771b - .; " /* label */ \
+ " .long 774f - .; " /* new instruction */ \
+ " .4byte " __stringify(ft_flags) "; " /* feature + flags */ \
+ " .byte " alt_total_slen "; " /* source len */ \
+ " .byte " alt_rlen "; " /* replacement len */ \
".popsection\n"
-#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
+#define ALTINSTR_REPLACEMENT(newinstr) \
+ "# ALT REPLACEMENT:\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
- "# ALT: replacement\n" \
- "774:\n\t" newinstr "\n775:\n" \
- ".popsection\n"
+ "774:\t" newinstr "\n" \
+ "775:\n" \
+ ".popsection"
-/* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+
+#define __ALTERNATIVE(oldinstr, newinstr, ft_flags) \
OLDINSTR(oldinstr) \
ALTINSTR_ENTRY(ft_flags) \
ALTINSTR_REPLACEMENT(newinstr)
-#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
- ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2)
+#define __ALTERNATIVE_2(oldinstr, \
+ newinstr1, ft_flags1, \
+ newinstr2, ft_flags2) \
+ __ALTERNATIVE("\n" \
+ __ALTERNATIVE(oldinstr, \
+ newinstr1, ft_flags1), \
+ newinstr2, ft_flags2)
+
+#define __ALTERNATIVE_3(oldinstr, \
+ newinstr1, ft_flags1, \
+ newinstr2, ft_flags2, \
+ newinstr3, ft_flags3) \
+ __ALTERNATIVE("\n" \
+ __ALTERNATIVE_2(oldinstr, \
+ newinstr1, ft_flags1, \
+ newinstr2, ft_flags2), \
+ newinstr3, ft_flags3)
+
+
+#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+ "\n# <ALTERNATIVE>\n" \
+ __ALTERNATIVE("\t" oldinstr, newinstr, ft_flags) \
+ "\n# </ALTERNATIVE>\n"
+
+#define ALTERNATIVE_2(oldinstr, \
+ newinstr1, ft_flags1, \
+ newinstr2, ft_flags2) \
+ "\n# <ALTERNATIVE_2>\n" \
+ __ALTERNATIVE_2("\t" \
+ oldinstr, \
+ newinstr1, ft_flags1, \
+ newinstr2, ft_flags2) \
+ "\n# </ALTERNATIVE_2>\n"
+
+#define ALTERNATIVE_3(oldinstr, \
+ newinstr1, ft_flags1, \
+ newinstr2, ft_flags2, \
+ newinstr3, ft_flags3) \
+ "\n# <ALTERNATIVE_3>\n" \
+ __ALTERNATIVE_3("\t" \
+ oldinstr, \
+ newinstr1, ft_flags1, \
+ newinstr2, ft_flags2, \
+ newinstr3, ft_flags3) \
+ "\n# </ALTERNATIVE_3>\n"
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags)
-#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \
- newinstr3, ft_flags3) \
- ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \
- newinstr3, ft_flags3)
-
/*
* Alternative instructions for different CPU types or capabilities.
*
--
2.49.0
On Tue, Apr 08, 2025 at 01:21:17AM -0700, Josh Poimboeuf wrote: > Improve the readability and compactness of alternatives code. > > --------------------- > ALTERNATIVE() before: > --------------------- > > # ALT: oldinstr > 771: > rep movsb > 772: > # ALT: padding > .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90 > 773: > .pushsection .altinstructions,"a" > .long 771b - . > .long 774f - . > .4byte (((1 << 0) << 16) | ((18*32+ 4))) > .byte 773b-771b > .byte 775f-774f > .popsection > .pushsection .altinstr_replacement, "ax" > # ALT: replacement > 774: > call rep_movs_alternative > 775: > .popsection > > -------------------- > ALTERNATIVE() after: > -------------------- > > # <ALTERNATIVE> > 771: rep movsb > 772: .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90 > 773: > # ALT ENTRY: > .pushsection .altinstructions,"a"; .long 771b - .; .long 774f - .; .4byte (((1 << 0) << 16) | ((18*32+ 4))); .byte 773b-771b; .byte 775f-774f; .popsection I find this very hard to read. I prefer the multi line form it had before. Other than that, I like the new layout. > # ALT REPLACEMENT: > .pushsection .altinstr_replacement, "ax" > 774: call rep_movs_alternative > 775: > .popsection > # </ALTERNATIVE>
On Wed, Apr 09, 2025 at 04:38:21PM +0200, Peter Zijlstra wrote: > On Tue, Apr 08, 2025 at 01:21:17AM -0700, Josh Poimboeuf wrote: > > Improve the readability and compactness of alternatives code. > > > > --------------------- > > ALTERNATIVE() before: > > --------------------- > > > > # ALT: oldinstr > > 771: > > rep movsb > > 772: > > # ALT: padding > > .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90 > > 773: > > .pushsection .altinstructions,"a" > > .long 771b - . > > .long 774f - . > > .4byte (((1 << 0) << 16) | ((18*32+ 4))) > > .byte 773b-771b > > .byte 775f-774f > > .popsection > > .pushsection .altinstr_replacement, "ax" > > # ALT: replacement > > 774: > > call rep_movs_alternative > > 775: > > .popsection > > > > -------------------- > > ALTERNATIVE() after: > > -------------------- > > > > # <ALTERNATIVE> > > 771: rep movsb > > 772: .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90 > > 773: > > # ALT ENTRY: > > .pushsection .altinstructions,"a"; .long 771b - .; .long 774f - .; .4byte (((1 << 0) << 16) | ((18*32+ 4))); .byte 773b-771b; .byte 775f-774f; .popsection > > I find this very hard to read. I prefer the multi line form it had > before. I actually think the compactness of putting the entry on a single line is really nice. We're usually more interested in reading the code *around* the alternatives, rather than the alternatives themselves. Making the alt entry a big sprawling mess, for what typically resolves to just a few instructions, makes that a LOT harder. And it's *really* bad for ALTERNATIVE_2() and ALTERNATIVE_3(). Also, 99% of the time, we're not going to be debugging the alternatives themselves. Spreading out the alt entry directives across multiple lines is way overkill. It wastes space and cognitive load. That's what the comments are for. All you really need to care about are the original instructions, the replacement instructions, and potentially what feature+flags. We could even print the feature by adding the '(((1 << 0) << 16) | ((18*32+ 4)))' to one of the comments. Or even better, we can just show the actual human readable feature: ALT_NOT(X86_FEATURE_FSRM). Something like: # <ALTERNATIVE> 771: rep movsb 772: .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90 773: # ALT ENTRY: .pushsection .altinstructions,"a"; .long 771b - .; .long 774f - .; .4byte (((1 << 0) << 16) | ((18*32+ 4))); .byte 773b-771b; .byte 775f-774f; .popsection # ALT REPLACEMENT: ALT_NOT(X86_FEATURE_FSRM): .pushsection .altinstr_replacement, "ax" 774: call rep_movs_alternative 775: .popsection # </ALTERNATIVE> Then with all the relevant information in the comments, there's no reason to make the alt entry directives themselves very readable, right? Unless alternatives are broken and you're debugging it. Which should very much be the exception rather than the rule. One thing that does bother me slightly is the asymmetry of having the .pushsection and .popsection on the same line, whereas the replacement has them on separate lines. We could just put the .popsection on its own line: # <ALTERNATIVE> 771: rep movsb 772: .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90 773: # ALT ENTRY: .pushsection .altinstructions,"a"; .long 771b - .; .long 774f - .; .4byte (((1 << 0) << 16) | ((18*32+ 4))); .byte 773b-771b; .byte 775f-774f .popsection # ALT REPLACEMENT: ALT_NOT(X86_FEATURE_FSRM): .pushsection .altinstr_replacement, "ax" 774: call rep_movs_alternative 775: .popsection # </ALTERNATIVE> -- Josh
On Wed, 9 Apr 2025 at 10:41, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> I actually think the compactness of putting the entry on a single line
> is really nice.
Yeah, I think the noise with size calculations in particular is stuff
that I never look at, and making it one long line is better than
multiple lines, I think.
That said, for some of the alternatives, it would be even nicer if we
could make the noise more compact by just hardcoding sizes. Our
generic alternatives macros do result in some *very* verbose output
that is entirely illegible to humans.
And yes, we need that generic case for most of them, since the
instruction size will depend on register choice and the modrm
addressing details etc.
But some of them are kind of pointless.
I did have something that just knew that 'clac'/'stac' were three-byte
instructions (and it was very obvious indeed back when we encoded them
as such, using the ".byte" syntax).
And that avoided a *lot* of noise in the alternatives section, and
removed the subsequent ".skip" part entirely because both sides would
just use the right size without any calculations.
I can't find that hacky patch of mine, and I didn't keep it around
because it *was* hacky, but maybe I'll try to resurrect a cleaner
version of it.
There may not be very many people who care, because yeah, it really
only shows up when looking at the compiler-generated assembler output,
and I agree that that isn't exactly _common_.
Linus
On Wed, Apr 09, 2025 at 11:02:29AM -0700, Linus Torvalds wrote: > On Wed, 9 Apr 2025 at 10:41, Josh Poimboeuf <jpoimboe@kernel.org> wrote: > > > > I actually think the compactness of putting the entry on a single line > > is really nice. > > Yeah, I think the noise with size calculations in particular is stuff > that I never look at, and making it one long line is better than > multiple lines, I think. We could just give up on trying to making the code itself readable, and instead put the code+features in comments, e.g.: # <ALTERNATIVE> # # ORIG: rep movsb # NEW: call rep_movs_alternative # ALT_NOT(X86_FEATURE_FSRM) # 771: rep movsb; 772: .skip -(((775f-774f)-(772b-771b)) > 0) * ((775f-774f)-(772b-771b)),0x90; 773: .pushsection .altinstructions,"a"; .long 771b - .; .long 774f - .; .4byte (((1 << 0) << 16) | ((18*32+ 4))); .byte 773b-771b; .byte 775f-774f; .popsection; .pushsection .altinstr_replacement, "ax"; 774: call rep_movs_alternative; 775: .popsection # </ALTERNATIVE> -- Josh
On Wed, Apr 09, 2025 at 11:02:29AM -0700, Linus Torvalds wrote:
> On Wed, 9 Apr 2025 at 10:41, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> >
> > I actually think the compactness of putting the entry on a single line
> > is really nice.
>
> Yeah, I think the noise with size calculations in particular is stuff
> that I never look at, and making it one long line is better than
> multiple lines, I think.
>
> That said, for some of the alternatives, it would be even nicer if we
> could make the noise more compact by just hardcoding sizes. Our
> generic alternatives macros do result in some *very* verbose output
> that is entirely illegible to humans.
What if we were to use a global asm() to define an alternative .macro
whenever "alternative.h" gets included?
e.g.:
asm(".macro alternative oldinstr, newinstr, ft_flags\n"
"771:\t\\oldinstr\n"
"772:\t.skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * "
"((" alt_rlen ")-(" alt_slen ")),0x90\n"
"773:\n"
".pushsection .altinstructions,\"a\"\n\t"
".long 771b - .\n\t" /* label */
".long 774f - .\n\t " /* new instruction */
".4byte \\ft_flags\n\t" /* feature + flags */
".byte " alt_total_slen "\n\t" /* source len */
".byte " alt_rlen "\n\t" /* replacement len */
".popsection\n"
".pushsection .altinstr_replacement, \"ax\"\n"
"774:\t\\newinstr\n"
"775:\n"
".popsection\n"
".endm");
#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
"alternative \"" oldinstr "\", \"" newinstr "\", \"" __stringify(ft_flags) "\"; "
Then it becomes extremely readable:
alternative "", "stac", "( 9*32+20)";
Of course the downside is the macro gets generated even if it's never
used.
--
Josh
On Wed, 9 Apr 2025 at 12:51, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> What if we were to use a global asm() to define an alternative .macro
> whenever "alternative.h" gets included?
Yeah, I wouldn't mind that, but I have this dim memory of us having
tried it at some point and it didn't work.
I think the issue was that the in-compiler assembler was not as
complete as the external one (ie not doing macros at all or something
like that).
Linus
On Wed, Apr 09, 2025 at 02:20:55PM -0700, Linus Torvalds wrote: > On Wed, 9 Apr 2025 at 12:51, Josh Poimboeuf <jpoimboe@kernel.org> wrote: > > > > What if we were to use a global asm() to define an alternative .macro > > whenever "alternative.h" gets included? > > Yeah, I wouldn't mind that, but I have this dim memory of us having > tried it at some point and it didn't work. > > I think the issue was that the in-compiler assembler was not as > complete as the external one (ie not doing macros at all or something > like that). It seems to work with GCC 14 and Clang 18 at least. I can try to find some old toolchains to test with. -- Josh
On Wed, Apr 09, 2025 at 02:27:28PM -0700, Josh Poimboeuf wrote: > On Wed, Apr 09, 2025 at 02:20:55PM -0700, Linus Torvalds wrote: > > On Wed, 9 Apr 2025 at 12:51, Josh Poimboeuf <jpoimboe@kernel.org> wrote: > > > > > > What if we were to use a global asm() to define an alternative .macro > > > whenever "alternative.h" gets included? > > > > Yeah, I wouldn't mind that, but I have this dim memory of us having > > tried it at some point and it didn't work. > > > > I think the issue was that the in-compiler assembler was not as > > complete as the external one (ie not doing macros at all or something > > like that). > > It seems to work with GCC 14 and Clang 18 at least. I can try to find > some old toolchains to test with. Actually, Clang *compiled*, but on closer inspection it's actually silently omitting the inline asm :-/ So yeah, that's not going to work. -- Josh
© 2016 - 2025 Red Hat, Inc.