When available use one of the non-serializing WRMSR variants (WRMSRNS
with or without an immediate operand specifying the MSR register) in
__wrmsrq().
For the safe/unsafe variants make __wrmsrq() to be a common base
function instead of duplicating the ALTERNATIVE*() macros. This
requires to let native_wrmsr() use native_wrmsrq() instead of
__wrmsrq(). While changing this, convert native_wrmsr() into an inline
function.
Replace the only call of wsrmsrns() with the now equivalent call to
native_wrmsrq() and remove wsrmsrns().
The paravirt case will be handled later.
Originally-by: Xin Li (Intel) <xin@zytor.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- new patch, partially taken from "[RFC PATCH v2 21/34] x86/msr: Utilize
the alternatives mechanism to write MSR" by Xin Li.
---
arch/x86/include/asm/fred.h | 2 +-
arch/x86/include/asm/msr.h | 144 +++++++++++++++++++++++++++---------
2 files changed, 110 insertions(+), 36 deletions(-)
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 12b34d5b2953..8ae4429e5401 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -101,7 +101,7 @@ static __always_inline void fred_update_rsp0(void)
unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
- wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
+ native_wrmsrq(MSR_IA32_FRED_RSP0, rsp0);
__this_cpu_write(fred_rsp0, rsp0);
}
}
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index cf5205300266..19ed780c2a09 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -7,11 +7,11 @@
#ifndef __ASSEMBLER__
#include <asm/asm.h>
-#include <asm/errno.h>
#include <asm/cpumask.h>
#include <uapi/asm/msr.h>
#include <asm/shared/msr.h>
+#include <linux/errno.h>
#include <linux/types.h>
#include <linux/percpu.h>
@@ -56,6 +56,36 @@ static inline void do_trace_read_msr(u32 msr, u64 val, int failed) {}
static inline void do_trace_rdpmc(u32 msr, u64 val, int failed) {}
#endif
+/* The GNU Assembler (Gas) with Binutils 2.40 adds WRMSRNS support */
+#if defined(CONFIG_AS_IS_GNU) && CONFIG_AS_VERSION >= 24000
+#define ASM_WRMSRNS "wrmsrns"
+#else
+#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
+#endif
+
+/* The GNU Assembler (Gas) with Binutils 2.41 adds the .insn directive support */
+#if defined(CONFIG_AS_IS_GNU) && CONFIG_AS_VERSION >= 24100
+#define ASM_WRMSRNS_IMM \
+ " .insn VEX.128.F3.M7.W0 0xf6 /0, %[val], %[msr]%{:u32}\n\t"
+#else
+/*
+ * Note, clang also doesn't support the .insn directive.
+ *
+ * The register operand is encoded as %rax because all uses of the immediate
+ * form MSR access instructions reference %rax as the register operand.
+ */
+#define ASM_WRMSRNS_IMM \
+ " .byte 0xc4,0xe7,0x7a,0xf6,0xc0; .long %c[msr]"
+#endif
+
+#define PREPARE_RDX_FOR_WRMSR \
+ "mov %%rax, %%rdx\n\t" \
+ "shr $0x20, %%rdx\n\t"
+
+#define PREPARE_RCX_RDX_FOR_WRMSR \
+ "mov %[msr], %%ecx\n\t" \
+ PREPARE_RDX_FOR_WRMSR
+
/*
* Called only from an MSR fault handler, the instruction pointer points to
* the MSR access instruction that caused the fault.
@@ -93,12 +123,76 @@ static __always_inline u64 __rdmsr(u32 msr)
return EAX_EDX_VAL(val, low, high);
}
-static __always_inline void __wrmsrq(u32 msr, u64 val)
+static __always_inline bool __wrmsrq_variable(u32 msr, u64 val, int type)
{
- asm volatile("1: wrmsr\n"
- "2:\n"
- _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
- : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory");
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON(__builtin_constant_p(msr));
+#endif
+
+ /*
+ * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
+ * DS prefix to avoid a trailing NOP.
+ */
+ asm_inline volatile goto(
+ "1:\n"
+ ALTERNATIVE("ds wrmsr",
+ ASM_WRMSRNS,
+ X86_FEATURE_WRMSRNS)
+ _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type])
+
+ :
+ : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)), [type] "i" (type)
+ : "memory"
+ : badmsr);
+
+ return false;
+
+badmsr:
+ return true;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Non-serializing WRMSR or its immediate form, when available.
+ *
+ * Otherwise, it falls back to a serializing WRMSR.
+ */
+static __always_inline bool __wrmsrq_constant(u32 msr, u64 val, int type)
+{
+ BUILD_BUG_ON(!__builtin_constant_p(msr));
+
+ asm_inline volatile goto(
+ "1:\n"
+ ALTERNATIVE_2(PREPARE_RCX_RDX_FOR_WRMSR
+ "2: ds wrmsr",
+ PREPARE_RCX_RDX_FOR_WRMSR
+ ASM_WRMSRNS,
+ X86_FEATURE_WRMSRNS,
+ ASM_WRMSRNS_IMM,
+ X86_FEATURE_MSR_IMM)
+ _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
+ _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
+
+ :
+ : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
+ : "memory", "ecx", "rdx"
+ : badmsr);
+
+ return false;
+
+badmsr:
+ return true;
+}
+#endif
+
+static __always_inline bool __wrmsrq(u32 msr, u64 val, int type)
+{
+#ifdef CONFIG_X86_64
+ if (__builtin_constant_p(msr))
+ return __wrmsrq_constant(msr, val, type);
+#endif
+
+ return __wrmsrq_variable(msr, val, type);
}
#define native_rdmsr(msr, val1, val2) \
@@ -113,11 +207,15 @@ static __always_inline u64 native_rdmsrq(u32 msr)
return __rdmsr(msr);
}
-#define native_wrmsr(msr, low, high) \
- __wrmsrq((msr), (u64)(high) << 32 | (low))
+static __always_inline void native_wrmsrq(u32 msr, u64 val)
+{
+ __wrmsrq(msr, val, EX_TYPE_WRMSR);
+}
-#define native_wrmsrq(msr, val) \
- __wrmsrq((msr), (val))
+static __always_inline void native_wrmsr(u32 msr, u32 low, u32 high)
+{
+ native_wrmsrq(msr, (u64)high << 32 | low);
+}
static inline u64 native_read_msr(u32 msr)
{
@@ -149,15 +247,7 @@ static inline void notrace native_write_msr(u32 msr, u64 val)
/* Can be uninlined because referenced by paravirt */
static inline int notrace native_write_msr_safe(u32 msr, u64 val)
{
- int err;
-
- asm volatile("1: wrmsr ; xor %[err],%[err]\n"
- "2:\n\t"
- _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
- : [err] "=a" (err)
- : "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
- : "memory");
- return err;
+ return __wrmsrq(msr, val, EX_TYPE_WRMSR_SAFE) ? -EIO : 0;
}
extern int rdmsr_safe_regs(u32 regs[8]);
@@ -176,7 +266,6 @@ static inline u64 native_read_pmc(int counter)
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
-#include <linux/errno.h>
static __always_inline u64 read_msr(u32 msr)
{
return native_read_msr(msr);
@@ -268,21 +357,6 @@ static inline int wrmsrq_safe(u32 msr, u64 val)
return err;
}
-/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
-#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
-
-/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
-static __always_inline void wrmsrns(u32 msr, u64 val)
-{
- /*
- * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
- * DS prefix to avoid a trailing NOP.
- */
- asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS)
- "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
- : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
-}
-
static inline void wrmsr(u32 msr, u32 low, u32 high)
{
wrmsrq(msr, (u64)high << 32 | low);
--
2.51.0
On Tue, Sep 30, 2025, Juergen Gross wrote:
> When available use one of the non-serializing WRMSR variants (WRMSRNS
> with or without an immediate operand specifying the MSR register) in
> __wrmsrq().
>
> For the safe/unsafe variants make __wrmsrq() to be a common base
> function instead of duplicating the ALTERNATIVE*() macros. This
> requires to let native_wrmsr() use native_wrmsrq() instead of
> __wrmsrq(). While changing this, convert native_wrmsr() into an inline
> function.
>
> Replace the only call of wsrmsrns() with the now equivalent call to
> native_wrmsrq() and remove wsrmsrns().
>
> The paravirt case will be handled later.
...
> @@ -268,21 +357,6 @@ static inline int wrmsrq_safe(u32 msr, u64 val)
> return err;
> }
>
> -/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
> -#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
> -
> -/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
> -static __always_inline void wrmsrns(u32 msr, u64 val)
FYI, a use of wrmsrns() is likely coming in through the KVM (x86) tree, commit
65391feb042b ("KVM: VMX: Add host MSR read/write helpers to consolidate preemption
handling").
Probably makes sense to spin v3 after the merge window? Or on linux-next? (I
can't tell what was used as the base, and I double-checked that the above commit
is in linux-next).
> -{
> - /*
> - * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
> - * DS prefix to avoid a trailing NOP.
> - */
> - asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS)
> - "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
> - : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
> -}
> -
> static inline void wrmsr(u32 msr, u32 low, u32 high)
> {
> wrmsrq(msr, (u64)high << 32 | low);
> --
> 2.51.0
>
On 30.09.25 18:00, Sean Christopherson wrote:
> On Tue, Sep 30, 2025, Juergen Gross wrote:
>> When available use one of the non-serializing WRMSR variants (WRMSRNS
>> with or without an immediate operand specifying the MSR register) in
>> __wrmsrq().
>>
>> For the safe/unsafe variants make __wrmsrq() to be a common base
>> function instead of duplicating the ALTERNATIVE*() macros. This
>> requires to let native_wrmsr() use native_wrmsrq() instead of
>> __wrmsrq(). While changing this, convert native_wrmsr() into an inline
>> function.
>>
>> Replace the only call of wsrmsrns() with the now equivalent call to
>> native_wrmsrq() and remove wsrmsrns().
>>
>> The paravirt case will be handled later.
>
> ...
>
>> @@ -268,21 +357,6 @@ static inline int wrmsrq_safe(u32 msr, u64 val)
>> return err;
>> }
>>
>> -/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
>> -#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
>> -
>> -/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
>> -static __always_inline void wrmsrns(u32 msr, u64 val)
>
> FYI, a use of wrmsrns() is likely coming in through the KVM (x86) tree, commit
> 65391feb042b ("KVM: VMX: Add host MSR read/write helpers to consolidate preemption
> handling").
Thanks for the heads up!
>
> Probably makes sense to spin v3 after the merge window? Or on linux-next? (I
> can't tell what was used as the base, and I double-checked that the above commit
> is in linux-next).
I'll find a proper solution. :-)
Juergen
On Tue, Sep 30, 2025 at 09:03:53AM +0200, Juergen Gross wrote:
> +static __always_inline bool __wrmsrq_constant(u32 msr, u64 val, int type)
> +{
> + BUILD_BUG_ON(!__builtin_constant_p(msr));
> +
> + asm_inline volatile goto(
> + "1:\n"
> + ALTERNATIVE_2(PREPARE_RCX_RDX_FOR_WRMSR
> + "2: ds wrmsr",
> + PREPARE_RCX_RDX_FOR_WRMSR
> + ASM_WRMSRNS,
> + X86_FEATURE_WRMSRNS,
> + ASM_WRMSRNS_IMM,
> + X86_FEATURE_MSR_IMM)
> + _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
> + _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
> +
> + :
> + : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
> + : "memory", "ecx", "rdx"
> + : badmsr);
> +
> + return false;
> +
> +badmsr:
> + return true;
> +}
Just wondering, would something this work?
asm_inline volatile goto(
"1:\n"
ALTERNATIVE(PREPARE_RCX_RDX_FOR_WRMSR
"2:\n"
ALTERNATIVE("ds wrmsr",
ASM_WRMSRNS, X86_FEATURE_WRMSRNS),
ASM_WRMSRNS_IMM, X86_FEATURE_MSR_IMM);
_ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
_ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
:
: [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
: "memory", "ecx", "rdx"
: badmsr);
Its a bit weird because the nested alternative isn't for the exact same
position I suppose. But I find it a more readable form.
On 30.09.25 10:31, Peter Zijlstra wrote:
> On Tue, Sep 30, 2025 at 09:03:53AM +0200, Juergen Gross wrote:
>
>> +static __always_inline bool __wrmsrq_constant(u32 msr, u64 val, int type)
>> +{
>> + BUILD_BUG_ON(!__builtin_constant_p(msr));
>> +
>> + asm_inline volatile goto(
>> + "1:\n"
>> + ALTERNATIVE_2(PREPARE_RCX_RDX_FOR_WRMSR
>> + "2: ds wrmsr",
>> + PREPARE_RCX_RDX_FOR_WRMSR
>> + ASM_WRMSRNS,
>> + X86_FEATURE_WRMSRNS,
>> + ASM_WRMSRNS_IMM,
>> + X86_FEATURE_MSR_IMM)
>> + _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
>> + _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
>> +
>> + :
>> + : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
>> + : "memory", "ecx", "rdx"
>> + : badmsr);
>> +
>> + return false;
>> +
>> +badmsr:
>> + return true;
>> +}
>
> Just wondering, would something this work?
>
> asm_inline volatile goto(
> "1:\n"
> ALTERNATIVE(PREPARE_RCX_RDX_FOR_WRMSR
> "2:\n"
> ALTERNATIVE("ds wrmsr",
> ASM_WRMSRNS, X86_FEATURE_WRMSRNS),
> ASM_WRMSRNS_IMM, X86_FEATURE_MSR_IMM);
> _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
> _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
>
> :
> : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
> : "memory", "ecx", "rdx"
> : badmsr);
>
> Its a bit weird because the nested alternative isn't for the exact same
> position I suppose. But I find it a more readable form.
I don't think it would work. Nested ALTERNATIVE()s do work only with
all of them starting at the same location. Have a look at the
ALTERNATIVE() macro, which is defining the label "771" via OLDINSTR()
and then referring to this label via ALTINSTR_ENTRY(). In your case
the ALTINSTR_ENTRY() of the outer ALTERNATIVE() invocation would find
the wrong "771" label (the one of the inner ALTERNATIVE()).
Allowing such constructs would probably require switching from preprocessor
macros to assembler macros.
Juergen
On 30.09.25 10:46, Jürgen Groß wrote:
> On 30.09.25 10:31, Peter Zijlstra wrote:
>> On Tue, Sep 30, 2025 at 09:03:53AM +0200, Juergen Gross wrote:
>>
>>> +static __always_inline bool __wrmsrq_constant(u32 msr, u64 val, int type)
>>> +{
>>> + BUILD_BUG_ON(!__builtin_constant_p(msr));
>>> +
>>> + asm_inline volatile goto(
>>> + "1:\n"
>>> + ALTERNATIVE_2(PREPARE_RCX_RDX_FOR_WRMSR
>>> + "2: ds wrmsr",
>>> + PREPARE_RCX_RDX_FOR_WRMSR
>>> + ASM_WRMSRNS,
>>> + X86_FEATURE_WRMSRNS,
>>> + ASM_WRMSRNS_IMM,
>>> + X86_FEATURE_MSR_IMM)
>>> + _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS
>>> immediate */
>>> + _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
>>> +
>>> + :
>>> + : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
>>> + : "memory", "ecx", "rdx"
>>> + : badmsr);
>>> +
>>> + return false;
>>> +
>>> +badmsr:
>>> + return true;
>>> +}
>>
>> Just wondering, would something this work?
>>
>> asm_inline volatile goto(
>> "1:\n"
>> ALTERNATIVE(PREPARE_RCX_RDX_FOR_WRMSR
>> "2:\n"
>> ALTERNATIVE("ds wrmsr",
>> ASM_WRMSRNS, X86_FEATURE_WRMSRNS),
>> ASM_WRMSRNS_IMM, X86_FEATURE_MSR_IMM);
>> _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS
>> immediate */
>> _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
>>
>> :
>> : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
>> : "memory", "ecx", "rdx"
>> : badmsr);
>>
>> Its a bit weird because the nested alternative isn't for the exact same
>> position I suppose. But I find it a more readable form.
>
> I don't think it would work. Nested ALTERNATIVE()s do work only with
> all of them starting at the same location. Have a look at the
> ALTERNATIVE() macro, which is defining the label "771" via OLDINSTR()
> and then referring to this label via ALTINSTR_ENTRY(). In your case
> the ALTINSTR_ENTRY() of the outer ALTERNATIVE() invocation would find
> the wrong "771" label (the one of the inner ALTERNATIVE()).
>
> Allowing such constructs would probably require switching from preprocessor
> macros to assembler macros.
Thinking more about that I believe there are additional problems:
Having overlapping alternatives not starting at the same address will result
in problems with length padding of the outer alternative in case the inner
one starting later is extending past the end of the outer one. This might be
possible to handle, but it will be tedious.
A similar problem occurs with my recent series for merging nested alternative
patching into a temporary buffer. Currently the code relies on all nested
alternatives to start at the same location.
Using your idea with pv_ops could result in the inner alternative not being
at the start of the outer alternative AND being not the initial code. This
would result in patching in the .altinstructions area instead of the normal
.text site, resulting in possible loss of a patching action if the patched
area would have been used as a replacement before.
So in my opinion allowing alternatives to nest without all inner levels
starting at the same location as the outermost level would be a receipt for
failure.
I think I'll write another patch to BUG() in case such a situation is being
detected.
Juergen
On Wed, Oct 01, 2025 at 10:49:31AM +0200, Juergen Gross wrote: > Thinking more about that I believe there are additional problems: > > Having overlapping alternatives not starting at the same address will result > in problems with length padding of the outer alternative in case the inner > one starting later is extending past the end of the outer one. This might be > possible to handle, but it will be tedious. Yes, this must not happen. > Using your idea with pv_ops could result in the inner alternative not being > at the start of the outer alternative AND being not the initial code. This > would result in patching in the .altinstructions area instead of the normal > .text site, resulting in possible loss of a patching action if the patched > area would have been used as a replacement before. Not quite, the nested alternative was in the orig_insn part. So it would result in patching the orig text twice, once from the inner (which comes first in the patch list) and then once again from the outer (which comes later). > So in my opinion allowing alternatives to nest without all inner levels > starting at the same location as the outermost level would be a receipt for > failure. Certainly tricky, no argument there. > I think I'll write another patch to BUG() in case such a situation is being > detected. Fair enough; we should not currently have any such cases. And going by my attempt to make it work, its going to be really tricky in any case. But please put on a comment on why, which explains the constraints.
On 01.10.25 12:50, Peter Zijlstra wrote: > On Wed, Oct 01, 2025 at 10:49:31AM +0200, Juergen Gross wrote: > >> Thinking more about that I believe there are additional problems: >> >> Having overlapping alternatives not starting at the same address will result >> in problems with length padding of the outer alternative in case the inner >> one starting later is extending past the end of the outer one. This might be >> possible to handle, but it will be tedious. > > Yes, this must not happen. > >> Using your idea with pv_ops could result in the inner alternative not being >> at the start of the outer alternative AND being not the initial code. This >> would result in patching in the .altinstructions area instead of the normal >> .text site, resulting in possible loss of a patching action if the patched >> area would have been used as a replacement before. > > Not quite, the nested alternative was in the orig_insn part. So it would > result in patching the orig text twice, once from the inner (which comes > first in the patch list) and then once again from the outer (which comes > later). Yes, but that was the native case only. With pv_ops this would mean the original instruction would be the paravirt-call, resulting in your construct becoming an inner nesting level. > >> So in my opinion allowing alternatives to nest without all inner levels >> starting at the same location as the outermost level would be a receipt for >> failure. > > Certainly tricky, no argument there. > >> I think I'll write another patch to BUG() in case such a situation is being >> detected. > > Fair enough; we should not currently have any such cases. And going by > my attempt to make it work, its going to be really tricky in any case. > > But please put on a comment on why, which explains the constraints. Agreed. Juergen
On Tue, Sep 30, 2025 at 10:46:23AM +0200, Jürgen Groß wrote:
> On 30.09.25 10:31, Peter Zijlstra wrote:
> > On Tue, Sep 30, 2025 at 09:03:53AM +0200, Juergen Gross wrote:
> >
> > > +static __always_inline bool __wrmsrq_constant(u32 msr, u64 val, int type)
> > > +{
> > > + BUILD_BUG_ON(!__builtin_constant_p(msr));
> > > +
> > > + asm_inline volatile goto(
> > > + "1:\n"
> > > + ALTERNATIVE_2(PREPARE_RCX_RDX_FOR_WRMSR
> > > + "2: ds wrmsr",
> > > + PREPARE_RCX_RDX_FOR_WRMSR
> > > + ASM_WRMSRNS,
> > > + X86_FEATURE_WRMSRNS,
> > > + ASM_WRMSRNS_IMM,
> > > + X86_FEATURE_MSR_IMM)
> > > + _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
> > > + _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
> > > +
> > > + :
> > > + : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
> > > + : "memory", "ecx", "rdx"
> > > + : badmsr);
> > > +
> > > + return false;
> > > +
> > > +badmsr:
> > > + return true;
> > > +}
> >
> > Just wondering, would something this work?
> >
> > asm_inline volatile goto(
> > "1:\n"
> > ALTERNATIVE(PREPARE_RCX_RDX_FOR_WRMSR
> > "2:\n"
> > ALTERNATIVE("ds wrmsr",
> > ASM_WRMSRNS, X86_FEATURE_WRMSRNS),
> > ASM_WRMSRNS_IMM, X86_FEATURE_MSR_IMM);
> > _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
> > _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
> >
> > :
> > : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
> > : "memory", "ecx", "rdx"
> > : badmsr);
> >
> > Its a bit weird because the nested alternative isn't for the exact same
> > position I suppose. But I find it a more readable form.
>
> I don't think it would work. Nested ALTERNATIVE()s do work only with
> all of them starting at the same location. Have a look at the
> ALTERNATIVE() macro, which is defining the label "771" via OLDINSTR()
> and then referring to this label via ALTINSTR_ENTRY(). In your case
> the ALTINSTR_ENTRY() of the outer ALTERNATIVE() invocation would find
> the wrong "771" label (the one of the inner ALTERNATIVE()).
>
> Allowing such constructs would probably require switching from preprocessor
> macros to assembler macros.
Right, I was looking at the asm macros.
As long as the inner comes first in the apply list it should all just
work, except you get the double patching back.
Using the asm macros isn't going to make it more readable though.
Oh well, lets forget about this :-)
On Tue, Sep 30, 2025 at 10:50:44AM +0200, Peter Zijlstra wrote:
> On Tue, Sep 30, 2025 at 10:46:23AM +0200, Jürgen Groß wrote:
> > > asm_inline volatile goto(
> > > "1:\n"
> > > ALTERNATIVE(PREPARE_RCX_RDX_FOR_WRMSR
> > > "2:\n"
> > > ALTERNATIVE("ds wrmsr",
> > > ASM_WRMSRNS, X86_FEATURE_WRMSRNS),
> > > ASM_WRMSRNS_IMM, X86_FEATURE_MSR_IMM);
> > > _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
> > > _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
> > >
> > > :
> > > : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
> > > : "memory", "ecx", "rdx"
> > > : badmsr);
> > >
> Oh well, lets forget about this :-)
So I couldn't. I tried the below, which when building a .i generates the
following:
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((no_instrument_function)) void clear_page(void *page)
{
kmsan_unpoison_memory(page, ((1UL) << 12));
asm __inline volatile(
"# ALT: oldinstr\n"
"__UNIQUE_ID_altinstr_9" "_begin:\n\t"
"# ALT: oldinstr\n"
"__UNIQUE_ID_altinstr_8" "_begin:\n\t"
"call %c[old]" "\n"
"__UNIQUE_ID_altinstr_8" "_pad:\n"
"# ALT: padding\n"
".skip -(((" "__UNIQUE_ID_altinstr_8" "_alt_end - " "__UNIQUE_ID_altinstr_8" "_alt_begin" ")-(" "__UNIQUE_ID_altinstr_8" "_pad - " "__UNIQUE_ID_altinstr_8" "_begin" ")) > 0) * "
"((" "__UNIQUE_ID_altinstr_8" "_alt_end - " "__UNIQUE_ID_altinstr_8" "_alt_begin" ")-(" "__UNIQUE_ID_altinstr_8" "_pad - " "__UNIQUE_ID_altinstr_8" "_begin" ")),0x90\n"
"__UNIQUE_ID_altinstr_8" "_end:\n"
".pushsection .altinstructions,\"a\"\n"
" .long " "__UNIQUE_ID_altinstr_8" "_begin - .\n"
" .long " "__UNIQUE_ID_altinstr_8" "_alt_begin - .\n"
" .4byte " "( 3*32+16)" "\n"
" .byte " "__UNIQUE_ID_altinstr_8" "_end - " "__UNIQUE_ID_altinstr_8" "_begin" "\n"
" .byte " "__UNIQUE_ID_altinstr_8" "_alt_end - " "__UNIQUE_ID_altinstr_8" "_alt_begin" "\n"
".popsection\n"
".pushsection .altinstr_replacement, \"ax\"\n"
"# ALT: replacement\n"
"__UNIQUE_ID_altinstr_8" "_alt_begin:\n\t"
"call %c[new1]" "\n"
"__UNIQUE_ID_altinstr_8" "_alt_end:\n"
".popsection\n"
"\n"
"__UNIQUE_ID_altinstr_9" "_pad:\n"
"# ALT: padding\n"
".skip -(((" "__UNIQUE_ID_altinstr_9" "_alt_end - " "__UNIQUE_ID_altinstr_9" "_alt_begin" ")-(" "__UNIQUE_ID_altinstr_9" "_pad - " "__UNIQUE_ID_altinstr_9" "_begin" ")) > 0) * "
"((" "__UNIQUE_ID_altinstr_9" "_alt_end - " "__UNIQUE_ID_altinstr_9" "_alt_begin" ")-(" "__UNIQUE_ID_altinstr_9" "_pad - " "__UNIQUE_ID_altinstr_9" "_begin" ")),0x90\n"
"__UNIQUE_ID_altinstr_9" "_end:\n"
".pushsection .altinstructions,\"a\"\n"
" .long " "__UNIQUE_ID_altinstr_9" "_begin - .\n"
" .long " "__UNIQUE_ID_altinstr_9" "_alt_begin - .\n"
" .4byte " "( 9*32+ 9)" "\n"
" .byte " "__UNIQUE_ID_altinstr_9" "_end - " "__UNIQUE_ID_altinstr_9" "_begin" "\n"
" .byte " "__UNIQUE_ID_altinstr_9" "_alt_end - " "__UNIQUE_ID_altinstr_9" "_alt_begin" "\n"
".popsection\n"
".pushsection .altinstr_replacement, \"ax\"\n"
"# ALT: replacement\n"
"__UNIQUE_ID_altinstr_9" "_alt_begin:\n\t"
"call %c[new2]" "\n"
"__UNIQUE_ID_altinstr_9" "_alt_end:\n"
".popsection\n"
: "+r" (current_stack_pointer), "=D" (page)
: [old] "i" (clear_page_orig), [new1] "i" (clear_page_rep), [new2] "i" (clear_page_erms) , "D" (page)
: "cc", "memory", "rax", "rcx")
;
}
Which looks right, but utterly fails to build :(
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 15bc07a5ebb3..12a93982457a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/stringify.h>
#include <linux/objtool.h>
+#include <linux/compiler.h>
#include <asm/asm.h>
#include <asm/bug.h>
@@ -184,38 +185,41 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define ALT_CALL_INSTR "call BUG_func"
-#define alt_slen "772b-771b"
-#define alt_total_slen "773b-771b"
-#define alt_rlen "775f-774f"
+#define alt_slen(pfx) #pfx "_pad - " #pfx "_begin"
+#define alt_total_slen(pfx) #pfx "_end - " #pfx "_begin"
+#define alt_rlen(pfx) #pfx "_alt_end - " #pfx "_alt_begin"
-#define OLDINSTR(oldinstr) \
+#define OLDINSTR(oldinstr, pfx) \
"# ALT: oldinstr\n" \
- "771:\n\t" oldinstr "\n772:\n" \
+ #pfx "_begin:\n\t" oldinstr "\n" #pfx "_pad:\n" \
"# ALT: padding\n" \
- ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
- "773:\n"
+ ".skip -(((" alt_rlen(pfx) ")-(" alt_slen(pfx) ")) > 0) * " \
+ "((" alt_rlen(pfx) ")-(" alt_slen(pfx) ")),0x90\n" \
+ #pfx "_end:\n"
-#define ALTINSTR_ENTRY(ft_flags) \
+#define ALTINSTR_ENTRY(ft_flags, pfx) \
".pushsection .altinstructions,\"a\"\n" \
- " .long 771b - .\n" /* label */ \
- " .long 774f - .\n" /* new instruction */ \
+ " .long " #pfx "_begin - .\n" /* label */ \
+ " .long " #pfx "_alt_begin - .\n" /* new instruction */ \
" .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
- " .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen "\n" /* replacement len */ \
+ " .byte " alt_total_slen(pfx) "\n" /* source len */ \
+ " .byte " alt_rlen(pfx) "\n" /* replacement len */ \
".popsection\n"
-#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
+#define ALTINSTR_REPLACEMENT(newinstr, pfx) /* replacement */ \
".pushsection .altinstr_replacement, \"ax\"\n" \
"# ALT: replacement\n" \
- "774:\n\t" newinstr "\n775:\n" \
+ #pfx "_alt_begin:\n\t" newinstr "\n" #pfx "_alt_end:\n" \
".popsection\n"
/* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- OLDINSTR(oldinstr) \
- ALTINSTR_ENTRY(ft_flags) \
- ALTINSTR_REPLACEMENT(newinstr)
+#define __ALTERNATIVE(oldinstr, newinstr, ft_flags, pfx) \
+ OLDINSTR(oldinstr, pfx) \
+ ALTINSTR_ENTRY(ft_flags, pfx) \
+ ALTINSTR_REPLACEMENT(newinstr, pfx)
+
+#define ALTERNATIVE(oldinstr, newinstr, feat) \
+ __ALTERNATIVE(oldinstr, newinstr, feat, __UNIQUE_ID(altinstr))
#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 64ff73c533e5..d79552a61fda 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -163,7 +163,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
__asm__ ("" : "=r" (var) : "0" (var))
#endif
-#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__PASTE(__UNIQUE_ID_, prefix), _), __COUNTER__)
/**
* data_race - mark an expression as containing intentional data races
On 30.09.25 14:51, Peter Zijlstra wrote:
> On Tue, Sep 30, 2025 at 10:50:44AM +0200, Peter Zijlstra wrote:
>> On Tue, Sep 30, 2025 at 10:46:23AM +0200, Jürgen Groß wrote:
>
>>>> asm_inline volatile goto(
>>>> "1:\n"
>>>> ALTERNATIVE(PREPARE_RCX_RDX_FOR_WRMSR
>>>> "2:\n"
>>>> ALTERNATIVE("ds wrmsr",
>>>> ASM_WRMSRNS, X86_FEATURE_WRMSRNS),
>>>> ASM_WRMSRNS_IMM, X86_FEATURE_MSR_IMM);
>>>> _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For WRMSRNS immediate */
>>>> _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For WRMSR(NS) */
>>>>
>>>> :
>>>> : [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
>>>> : "memory", "ecx", "rdx"
>>>> : badmsr);
>>>>
>
>> Oh well, lets forget about this :-)
>
> So I couldn't. I tried the below, which when building a .i generates the
> following:
>
>
> static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((no_instrument_function)) void clear_page(void *page)
> {
>
> kmsan_unpoison_memory(page, ((1UL) << 12));
> asm __inline volatile(
> "# ALT: oldinstr\n"
> "__UNIQUE_ID_altinstr_9" "_begin:\n\t"
>
> "# ALT: oldinstr\n"
> "__UNIQUE_ID_altinstr_8" "_begin:\n\t"
> "call %c[old]" "\n"
> "__UNIQUE_ID_altinstr_8" "_pad:\n"
> "# ALT: padding\n"
> ".skip -(((" "__UNIQUE_ID_altinstr_8" "_alt_end - " "__UNIQUE_ID_altinstr_8" "_alt_begin" ")-(" "__UNIQUE_ID_altinstr_8" "_pad - " "__UNIQUE_ID_altinstr_8" "_begin" ")) > 0) * "
> "((" "__UNIQUE_ID_altinstr_8" "_alt_end - " "__UNIQUE_ID_altinstr_8" "_alt_begin" ")-(" "__UNIQUE_ID_altinstr_8" "_pad - " "__UNIQUE_ID_altinstr_8" "_begin" ")),0x90\n"
> "__UNIQUE_ID_altinstr_8" "_end:\n"
> ".pushsection .altinstructions,\"a\"\n"
> " .long " "__UNIQUE_ID_altinstr_8" "_begin - .\n"
> " .long " "__UNIQUE_ID_altinstr_8" "_alt_begin - .\n"
> " .4byte " "( 3*32+16)" "\n"
> " .byte " "__UNIQUE_ID_altinstr_8" "_end - " "__UNIQUE_ID_altinstr_8" "_begin" "\n"
> " .byte " "__UNIQUE_ID_altinstr_8" "_alt_end - " "__UNIQUE_ID_altinstr_8" "_alt_begin" "\n"
> ".popsection\n"
> ".pushsection .altinstr_replacement, \"ax\"\n"
> "# ALT: replacement\n"
> "__UNIQUE_ID_altinstr_8" "_alt_begin:\n\t"
> "call %c[new1]" "\n"
> "__UNIQUE_ID_altinstr_8" "_alt_end:\n"
> ".popsection\n"
> "\n"
>
> "__UNIQUE_ID_altinstr_9" "_pad:\n"
> "# ALT: padding\n"
> ".skip -(((" "__UNIQUE_ID_altinstr_9" "_alt_end - " "__UNIQUE_ID_altinstr_9" "_alt_begin" ")-(" "__UNIQUE_ID_altinstr_9" "_pad - " "__UNIQUE_ID_altinstr_9" "_begin" ")) > 0) * "
> "((" "__UNIQUE_ID_altinstr_9" "_alt_end - " "__UNIQUE_ID_altinstr_9" "_alt_begin" ")-(" "__UNIQUE_ID_altinstr_9" "_pad - " "__UNIQUE_ID_altinstr_9" "_begin" ")),0x90\n"
> "__UNIQUE_ID_altinstr_9" "_end:\n"
> ".pushsection .altinstructions,\"a\"\n"
> " .long " "__UNIQUE_ID_altinstr_9" "_begin - .\n"
> " .long " "__UNIQUE_ID_altinstr_9" "_alt_begin - .\n"
> " .4byte " "( 9*32+ 9)" "\n"
> " .byte " "__UNIQUE_ID_altinstr_9" "_end - " "__UNIQUE_ID_altinstr_9" "_begin" "\n"
> " .byte " "__UNIQUE_ID_altinstr_9" "_alt_end - " "__UNIQUE_ID_altinstr_9" "_alt_begin" "\n"
> ".popsection\n"
> ".pushsection .altinstr_replacement, \"ax\"\n"
> "# ALT: replacement\n"
> "__UNIQUE_ID_altinstr_9" "_alt_begin:\n\t"
> "call %c[new2]" "\n"
> "__UNIQUE_ID_altinstr_9" "_alt_end:\n"
> ".popsection\n"
> : "+r" (current_stack_pointer), "=D" (page)
> : [old] "i" (clear_page_orig), [new1] "i" (clear_page_rep), [new2] "i" (clear_page_erms) , "D" (page)
> : "cc", "memory", "rax", "rcx")
>
> ;
> }
>
> Which looks right, but utterly fails to build :(
What does the failure look like?
Could it be that the labels should be local ones?
Juergen
On Tue, Sep 30, 2025 at 05:42:13PM +0200, Jürgen Groß wrote:
> Could it be that the labels should be local ones?
I already tried 's/#pfx/".L" #pfx/g' and that made no difference.
> What does the failure look like?
Its complaining about label re-definitions.
$ make O=defconfig-build/ arch/x86/kernel/alternative.o
../arch/x86/include/asm/cpufeature.h: Assembler messages:
../arch/x86/include/asm/cpufeature.h:102: Error: symbol `.L__UNIQUE_ID_altinstr_67_begin' is already defined
../arch/x86/include/asm/cpufeature.h:104: Error: symbol `.L__UNIQUE_ID_altinstr_66_begin' is already defined
../arch/x86/include/asm/cpufeature.h:106: Error: symbol `.L__UNIQUE_ID_altinstr_66_pad' is already defined
../arch/x86/include/asm/cpufeature.h:109: Error: symbol `.L__UNIQUE_ID_altinstr_66_end' is already defined
../arch/x86/include/asm/cpufeature.h:119: Error: symbol `.L__UNIQUE_ID_altinstr_66_alt_begin' is already defined
../arch/x86/include/asm/cpufeature.h:121: Error: symbol `.L__UNIQUE_ID_altinstr_66_alt_end' is already defined
../arch/x86/include/asm/cpufeature.h:124: Error: symbol `.L__UNIQUE_ID_altinstr_67_pad' is already defined
../arch/x86/include/asm/cpufeature.h:127: Error: symbol `.L__UNIQUE_ID_altinstr_67_end' is already defined
../arch/x86/include/asm/cpufeature.h:137: Error: symbol `.L__UNIQUE_ID_altinstr_67_alt_begin' is already defined
../arch/x86/include/asm/cpufeature.h:139: Error: symbol `.L__UNIQUE_ID_altinstr_67_alt_end' is already defined
../arch/x86/include/asm/cpufeature.h:102: Error: symbol `.L__UNIQUE_ID_altinstr_67_begin' is already defined
../arch/x86/include/asm/cpufeature.h:104: Error: symbol `.L__UNIQUE_ID_altinstr_66_begin' is already defined
../arch/x86/include/asm/cpufeature.h:106: Error: symbol `.L__UNIQUE_ID_altinstr_66_pad' is already defined
../arch/x86/include/asm/cpufeature.h:109: Error: symbol `.L__UNIQUE_ID_altinstr_66_end' is already defined
../arch/x86/include/asm/cpufeature.h:119: Error: symbol `.L__UNIQUE_ID_altinstr_66_alt_begin' is already defined
../arch/x86/include/asm/cpufeature.h:121: Error: symbol `.L__UNIQUE_ID_altinstr_66_alt_end' is already defined
../arch/x86/include/asm/cpufeature.h:124: Error: symbol `.L__UNIQUE_ID_altinstr_67_pad' is already defined
../arch/x86/include/asm/cpufeature.h:127: Error: symbol `.L__UNIQUE_ID_altinstr_67_end' is already defined
../arch/x86/include/asm/cpufeature.h:137: Error: symbol `.L__UNIQUE_ID_altinstr_67_alt_begin' is already defined
../arch/x86/include/asm/cpufeature.h:139: Error: symbol `.L__UNIQUE_ID_altinstr_67_alt_end' is already defined
../arch/x86/include/asm/cpufeature.h:102: Error: symbol `.L__UNIQUE_ID_altinstr_67_begin' is already defined
../arch/x86/include/asm/cpufeature.h:104: Error: symbol `.L__UNIQUE_ID_altinstr_66_begin' is already defined
../arch/x86/include/asm/cpufeature.h:106: Error: symbol `.L__UNIQUE_ID_altinstr_66_pad' is already defined
../arch/x86/include/asm/cpufeature.h:109: Error: symbol `.L__UNIQUE_ID_altinstr_66_end' is already defined
../arch/x86/include/asm/cpufeature.h:119: Error: symbol `.L__UNIQUE_ID_altinstr_66_alt_begin' is already defined
../arch/x86/include/asm/cpufeature.h:121: Error: symbol `.L__UNIQUE_ID_altinstr_66_alt_end' is already defined
../arch/x86/include/asm/cpufeature.h:124: Error: symbol `.L__UNIQUE_ID_altinstr_67_pad' is already defined
../arch/x86/include/asm/cpufeature.h:127: Error: symbol `.L__UNIQUE_ID_altinstr_67_end' is already defined
...
That specific one is this:
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((no_instrument_function)) __attribute__((__always_inline__)) bool _static_cpu_has(u16 bit)
{
asm goto("# ALT: oldinstr\n" ".L" "__UNIQUE_ID_altinstr_67" "_begin:\n\t" "# ALT: oldinstr\n" ".L" "__UNIQUE_ID_altinstr_66" "_begin:\n\t" "jmp 6f" "\n" ".L" "__UNIQUE_ID_altinstr_66" "_pad:\n" "# ALT: padding\n" ".skip -(((" ".L" "_ _UNIQUE_ID_altinstr_66" "_alt_end - " ".L" "__UNIQUE_ID_altinstr_66" "_alt_begin" ")-(" ".L" "__UNIQUE_ID_altinstr_66" "_pad - " ".L" "__UNIQUE_ID_altinstr_66" "_begin" ")) > 0) * " "((" ".L" "__UNIQUE_ID_altinstr_66" "_alt_end - " " .L" "__UNIQUE_ID_altinstr_66" "_alt_begin" ")-(" ".L" "__UNIQUE_ID_altinstr_66" "_pad - " ".L" "__UNIQUE_ID_altinstr_66" "_begin" ")),0x90\n" ".L" "__UNIQUE_ID_altinstr_66" "_end:\n" ".pushsection .altinstructions,\"a\"\n" " .long " ".L" "__UNIQUE_ID_altinstr_66" "_begin - .\n" " .long " ".L" "__UNIQUE_ID_altinstr_66" "_alt_begin - .\n" " .4byte " "( 3*32+21)" "\n" " .byte " ".L" "__UNIQUE_ID_altinstr_66" "_end - " ".L" "__UNIQUE_ID_altinstr_66" "_begin" "\n" " .byte " ".L" "__UNIQUE_ID_altinstr_66" "_alt_end - " ".L" "__UNIQUE_ID_altinstr_66" "_alt_begin" "\n" ".popsection\n" ".pushsection .altinstr_replacement, \"ax\"\n" "# ALT: replacement\n" ".L" "__UNIQUE_ID_altinstr_66" "_alt_begin:\n \t" "jmp %l[t_no]" "\n" ".L" "__UNIQUE_ID_altinstr_66" "_alt_end:\n" ".popsection\n" "\n" ".L" "__UNIQUE_ID_altinstr_67" "_pad:\n" "# ALT: padding\n" ".skip -(((" ".L" "__UNIQUE_ID_altinstr_67" "_alt_end - " ".L" "__UNIQUE_ID_altinst r_67" "_alt_begin" ")-(" ".L" "__UNIQUE_ID_altinstr_67" "_pad - " ".L" "__UNIQUE_ID_altinstr_67" "_begin" ")) > 0) * " "((" ".L" "__UNIQUE_ID_altinstr_67" "_alt_end - " ".L" "__UNIQUE_ID_altinstr_67" "_alt_begin" ")-(" ".L" "__UNIQUE _ID_altinstr_67" "_pad - " ".L" "__UNIQUE_ID_altinstr_67" "_begin" ")),0x90\n" ".L" "__UNIQUE_ID_altinstr_67" "_end:\n" ".pushsection .altinstructions,\"a\"\n" " .long " ".L" "__UNIQUE_ID_altinstr_67" "_begin - .\n" " .long " ".L" "_ _UNIQUE_ID_altinstr_67" "_alt_begin - .\n" " .4byte " "%c[feature]" "\n" " .byte " ".L" "__UNIQUE_ID_altinstr_67" "_end - " ".L" "__UNIQUE_ID_altinstr_67" "_begin" "\n" " .byte " ".L" "__UNIQUE_ID_altinstr_67" "_alt_end - " ".L" "__U NIQUE_ID_altinstr_67" "_alt_begin" "\n" ".popsection\n" ".pushsection .altinstr_replacement, \"ax\"\n" "# ALT: replacement\n" ".L" "__UNIQUE_ID_altinstr_67" "_alt_begin:\n\t" "" "\n" ".L" "__UNIQUE_ID_altinstr_67" "_alt_end:\n" ".pop section\n"
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
" testb %[bitnum], %a[cap_byte]\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
".popsection\n"
: : [feature] "i" (bit),
[bitnum] "i" (1 << (bit & 7)),
[cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
t_yes:
return true;
t_no:
return false;
}
What I'm thinking is happening is that the __always_inline__ is causing
multiple exact copies of the thing.
Let me see how terrible it all ends up when using as macros
On Wed, Oct 01, 2025 at 08:43:39AM +0200, Peter Zijlstra wrote: > Let me see how terrible it all ends up when using as macros Argh, as macros are differently painful. I hate computers :/
On 10/1/25 00:23, Peter Zijlstra wrote: > On Wed, Oct 01, 2025 at 08:43:39AM +0200, Peter Zijlstra wrote: >> Let me see how terrible it all ends up when using as macros > Argh, as macros are differently painful. I hate computers :/ ALTERNATIVES are fun and all, but is there a good reason we're pulling out our hair to use them here? Normal WRMSR is slooooooooooow. The ones that aren't slow don't need WRMSRNS in the first place. Would an out-of-line wrmsr() with an if() in it be so bad? Or a static_call()? Having WRMSR be inlined in a laudable goal, but I'm really asking if it's worth it.
On October 3, 2025 7:23:29 AM PDT, Dave Hansen <dave.hansen@intel.com> wrote: >On 10/1/25 00:23, Peter Zijlstra wrote: >> On Wed, Oct 01, 2025 at 08:43:39AM +0200, Peter Zijlstra wrote: >>> Let me see how terrible it all ends up when using as macros >> Argh, as macros are differently painful. I hate computers :/ > >ALTERNATIVES are fun and all, but is there a good reason we're pulling >out our hair to use them here? > >Normal WRMSR is slooooooooooow. The ones that aren't slow don't need >WRMSRNS in the first place. > >Would an out-of-line wrmsr() with an if() in it be so bad? Or a >static_call()? Having WRMSR be inlined in a laudable goal, but I'm >really asking if it's worth it. We need them to use wrmsrns immediate.
© 2016 - 2025 Red Hat, Inc.