[PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR

Juergen Gross posted 16 patches 1 month, 1 week ago
[PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Juergen Gross 1 month, 1 week ago
When available use one of the non-serializing WRMSR variants (WRMSRNS
with or without an immediate operand specifying the MSR register) in
__wrmsrq().

For the safe/unsafe variants make __wrmsrq() to be a common base
function instead of duplicating the ALTERNATIVE*() macros. This
requires to let native_wrmsr() use native_wrmsrq() instead of
__wrmsrq(). While changing this, convert native_wrmsr() into an inline
function.

Replace the only call of wsrmsrns() with the now equivalent call to
native_wrmsrq() and remove wsrmsrns().

The paravirt case will be handled later.

Originally-by: Xin Li (Intel) <xin@zytor.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- new patch, partially taken from "[RFC PATCH v2 21/34] x86/msr: Utilize
  the alternatives mechanism to write MSR" by Xin Li.
---
 arch/x86/include/asm/fred.h |   2 +-
 arch/x86/include/asm/msr.h  | 144 +++++++++++++++++++++++++++---------
 arch/x86/kvm/vmx/vmx.c      |   2 +-
 3 files changed, 111 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 2bb65677c079..71fc0c6e4e32 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -101,7 +101,7 @@ static __always_inline void fred_update_rsp0(void)
 	unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
 
 	if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
-		wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
+		native_wrmsrq(MSR_IA32_FRED_RSP0, rsp0);
 		__this_cpu_write(fred_rsp0, rsp0);
 	}
 }
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 71f41af11591..ba11c3375cbd 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -7,11 +7,11 @@
 #ifndef __ASSEMBLER__
 
 #include <asm/asm.h>
-#include <asm/errno.h>
 #include <asm/cpumask.h>
 #include <uapi/asm/msr.h>
 #include <asm/shared/msr.h>
 
+#include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/percpu.h>
 
@@ -56,6 +56,36 @@ static inline void do_trace_read_msr(u32 msr, u64 val, int failed) {}
 static inline void do_trace_rdpmc(u32 msr, u64 val, int failed) {}
 #endif
 
+/* The GNU Assembler (Gas) with Binutils 2.40 adds WRMSRNS support */
+#if defined(CONFIG_AS_IS_GNU) && CONFIG_AS_VERSION >= 24000
+#define ASM_WRMSRNS		"wrmsrns\n\t"
+#else
+#define ASM_WRMSRNS		_ASM_BYTES(0x0f,0x01,0xc6)
+#endif
+
+/* The GNU Assembler (Gas) with Binutils 2.41 adds the .insn directive support */
+#if defined(CONFIG_AS_IS_GNU) && CONFIG_AS_VERSION >= 24100
+#define ASM_WRMSRNS_IMM			\
+	" .insn VEX.128.F3.M7.W0 0xf6 /0, %[val], %[msr]%{:u32}\n\t"
+#else
+/*
+ * Note, clang also doesn't support the .insn directive.
+ *
+ * The register operand is encoded as %rax because all uses of the immediate
+ * form MSR access instructions reference %rax as the register operand.
+ */
+#define ASM_WRMSRNS_IMM			\
+	" .byte 0xc4,0xe7,0x7a,0xf6,0xc0; .long %c[msr]"
+#endif
+
+#define PREPARE_RDX_FOR_WRMSR		\
+	"mov %%rax, %%rdx\n\t"		\
+	"shr $0x20, %%rdx\n\t"
+
+#define PREPARE_RCX_RDX_FOR_WRMSR	\
+	"mov %[msr], %%ecx\n\t"		\
+	PREPARE_RDX_FOR_WRMSR
+
 /*
  * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR
  * accessors and should not have any tracing or other functionality piggybacking
@@ -75,12 +105,76 @@ static __always_inline u64 __rdmsr(u32 msr)
 	return EAX_EDX_VAL(val, low, high);
 }
 
-static __always_inline void __wrmsrq(u32 msr, u64 val)
+static __always_inline bool __wrmsrq_variable(u32 msr, u64 val, int type)
 {
-	asm volatile("1: wrmsr\n"
-		     "2:\n"
-		     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
-		     : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory");
+#ifdef CONFIG_X86_64
+	BUILD_BUG_ON(__builtin_constant_p(msr));
+#endif
+
+	/*
+	 * WRMSR is 2 bytes.  WRMSRNS is 3 bytes.  Pad WRMSR with a redundant
+	 * DS prefix to avoid a trailing NOP.
+	 */
+	asm_inline volatile goto(
+		"1:\n"
+		ALTERNATIVE("ds wrmsr",
+			    ASM_WRMSRNS,
+			    X86_FEATURE_WRMSRNS)
+		_ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type])
+
+		:
+		: "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)), [type] "i" (type)
+		: "memory"
+		: badmsr);
+
+	return false;
+
+badmsr:
+	return true;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Non-serializing WRMSR or its immediate form, when available.
+ *
+ * Otherwise, it falls back to a serializing WRMSR.
+ */
+static __always_inline bool __wrmsrq_constant(u32 msr, u64 val, int type)
+{
+	BUILD_BUG_ON(!__builtin_constant_p(msr));
+
+	asm_inline volatile goto(
+		"1:\n"
+		ALTERNATIVE_2(PREPARE_RCX_RDX_FOR_WRMSR
+			      "2: ds wrmsr",
+			      PREPARE_RCX_RDX_FOR_WRMSR
+			      ASM_WRMSRNS,
+			      X86_FEATURE_WRMSRNS,
+			      ASM_WRMSRNS_IMM,
+			      X86_FEATURE_MSR_IMM)
+		_ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type])	/* For WRMSRNS immediate */
+		_ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type])	/* For WRMSR(NS) */
+
+		:
+		: [val] "a" (val), [msr] "i" (msr), [type] "i" (type)
+		: "memory", "ecx", "rdx"
+		: badmsr);
+
+	return false;
+
+badmsr:
+	return true;
+}
+#endif
+
+static __always_inline bool __wrmsrq(u32 msr, u64 val, int type)
+{
+#ifdef CONFIG_X86_64
+	if (__builtin_constant_p(msr))
+		return __wrmsrq_constant(msr, val, type);
+#endif
+
+	return __wrmsrq_variable(msr, val, type);
 }
 
 #define native_rdmsr(msr, val1, val2)			\
@@ -95,11 +189,15 @@ static __always_inline u64 native_rdmsrq(u32 msr)
 	return __rdmsr(msr);
 }
 
-#define native_wrmsr(msr, low, high)			\
-	__wrmsrq((msr), (u64)(high) << 32 | (low))
+static __always_inline void native_wrmsrq(u32 msr, u64 val)
+{
+	__wrmsrq(msr, val, EX_TYPE_WRMSR);
+}
 
-#define native_wrmsrq(msr, val)				\
-	__wrmsrq((msr), (val))
+static __always_inline void native_wrmsr(u32 msr, u32 low, u32 high)
+{
+	native_wrmsrq(msr, (u64)high << 32 | low);
+}
 
 static inline u64 native_read_msr(u32 msr)
 {
@@ -131,15 +229,7 @@ static inline void notrace native_write_msr(u32 msr, u64 val)
 /* Can be uninlined because referenced by paravirt */
 static inline int notrace native_write_msr_safe(u32 msr, u64 val)
 {
-	int err;
-
-	asm volatile("1: wrmsr ; xor %[err],%[err]\n"
-		     "2:\n\t"
-		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
-		     : [err] "=a" (err)
-		     : "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
-		     : "memory");
-	return err;
+	return __wrmsrq(msr, val, EX_TYPE_WRMSR_SAFE) ? -EIO : 0;
 }
 
 extern int rdmsr_safe_regs(u32 regs[8]);
@@ -158,7 +248,6 @@ static inline u64 native_read_pmc(int counter)
 #ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
 #else
-#include <linux/errno.h>
 static __always_inline u64 read_msr(u32 msr)
 {
 	return native_read_msr(msr);
@@ -250,21 +339,6 @@ static inline int wrmsrq_safe(u32 msr, u64 val)
 	return err;
 }
 
-/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
-#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
-
-/* Non-serializing WRMSR, when available.  Falls back to a serializing WRMSR. */
-static __always_inline void wrmsrns(u32 msr, u64 val)
-{
-	/*
-	 * WRMSR is 2 bytes.  WRMSRNS is 3 bytes.  Pad WRMSR with a redundant
-	 * DS prefix to avoid a trailing NOP.
-	 */
-	asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS)
-		     "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
-		     : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
-}
-
 static inline void wrmsr(u32 msr, u32 low, u32 high)
 {
 	wrmsrq(msr, (u64)high << 32 | low);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3799cbbb4577..e29a2ac24669 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1473,7 +1473,7 @@ static void vmx_write_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 data,
 {
 	preempt_disable();
 	if (vmx->vt.guest_state_loaded)
-		wrmsrns(msr, data);
+		native_wrmsrq(msr, data);
 	preempt_enable();
 	*cache = data;
 }
-- 
2.53.0
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Sean Christopherson 1 month, 1 week ago
On Wed, Feb 18, 2026, Juergen Gross wrote:
> When available use one of the non-serializing WRMSR variants (WRMSRNS
> with or without an immediate operand specifying the MSR register) in
> __wrmsrq().

Silently using a non-serializing version (or not) seems dangerous (not for KVM,
but for the kernel at-large), unless the rule is going to be that MSR writes need
to be treated as non-serializing by default.  Which I'm fine with, but if we go
that route, then I'd prefer not to special case non-serializing callers.

E.g. in the KVM code, I find the use of wrmsrns() intuitive, because KVM doesn't
need the WRMSR to be serializing and so can eke out a bit of extra performance by
using wrmsrns() instead of wrmsrq().  But with native_wrmsrq(), it's not clear
why _this_ particular WRMSR in KVM needs to use the "native" version.

There are a pile of other WRMSRs in KVM that are in hot paths, especially with
the mediated PMU support.  If we're going to make the default version non-serializing,
then I'd prefer to get that via wrmsrq(), i.e. reap the benefits for all of KVM,
not just one arbitrary path.

> For the safe/unsafe variants make __wrmsrq() to be a common base
> function instead of duplicating the ALTERNATIVE*() macros. This
> requires to let native_wrmsr() use native_wrmsrq() instead of
> __wrmsrq(). While changing this, convert native_wrmsr() into an inline
> function.
> 
> Replace the only call of wsrmsrns() with the now equivalent call to
> native_wrmsrq() and remove wsrmsrns().

...

> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 3799cbbb4577..e29a2ac24669 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1473,7 +1473,7 @@ static void vmx_write_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 data,
>  {
>  	preempt_disable();
>  	if (vmx->vt.guest_state_loaded)
> -		wrmsrns(msr, data);
> +		native_wrmsrq(msr, data);
>  	preempt_enable();
>  	*cache = data;
>  }
> -- 
> 2.53.0
>
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Dave Hansen 1 month, 1 week ago
On 2/18/26 13:00, Sean Christopherson wrote:
> On Wed, Feb 18, 2026, Juergen Gross wrote:
>> When available use one of the non-serializing WRMSR variants (WRMSRNS
>> with or without an immediate operand specifying the MSR register) in
>> __wrmsrq().
> Silently using a non-serializing version (or not) seems dangerous (not for KVM,
> but for the kernel at-large), unless the rule is going to be that MSR writes need
> to be treated as non-serializing by default.

Yeah, there's no way we can do this in general. It'll work for 99% of
the MSRs on 99% of the systems for a long time. Then the one new system
with WRMSRNS is going to have one hell of a heisenbug that'll take years
off some poor schmuck's life.

We should really encourage *new* code to use wrmsrns() when it can at
least for annotation that it doesn't need serialization. But I don't
think we should do anything to old, working code.
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Jürgen Groß 1 month, 1 week ago
On 18.02.26 22:37, Dave Hansen wrote:
> On 2/18/26 13:00, Sean Christopherson wrote:
>> On Wed, Feb 18, 2026, Juergen Gross wrote:
>>> When available use one of the non-serializing WRMSR variants (WRMSRNS
>>> with or without an immediate operand specifying the MSR register) in
>>> __wrmsrq().
>> Silently using a non-serializing version (or not) seems dangerous (not for KVM,
>> but for the kernel at-large), unless the rule is going to be that MSR writes need
>> to be treated as non-serializing by default.
> 
> Yeah, there's no way we can do this in general. It'll work for 99% of
> the MSRs on 99% of the systems for a long time. Then the one new system
> with WRMSRNS is going to have one hell of a heisenbug that'll take years
> off some poor schmuck's life.

I _really_ thought this was discussed upfront by Xin before he sent out his
first version of the series.

Sorry for not making it more clear in the header message.


Juergen
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Xin Li 1 month, 1 week ago
> On Feb 18, 2026, at 10:44 PM, Jürgen Groß <jgross@suse.com> wrote:
> 
> On 18.02.26 22:37, Dave Hansen wrote:
>> On 2/18/26 13:00, Sean Christopherson wrote:
>>> On Wed, Feb 18, 2026, Juergen Gross wrote:
>>>> When available use one of the non-serializing WRMSR variants (WRMSRNS
>>>> with or without an immediate operand specifying the MSR register) in
>>>> __wrmsrq().
>>> Silently using a non-serializing version (or not) seems dangerous (not for KVM,
>>> but for the kernel at-large), unless the rule is going to be that MSR writes need
>>> to be treated as non-serializing by default.
>> Yeah, there's no way we can do this in general. It'll work for 99% of
>> the MSRs on 99% of the systems for a long time. Then the one new system
>> with WRMSRNS is going to have one hell of a heisenbug that'll take years
>> off some poor schmuck's life.
> 
> I _really_ thought this was discussed upfront by Xin before he sent out his
> first version of the series.

I actually reached out to the Intel architects about this before I started
coding. Turns out, if the CPU supports WRMSRNS, you can use it across the
board.  The hardware is smart enough to perform a serialized write whenever
a non-serialized one isn't proper, so there’s no risk.
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Dave Hansen 1 month, 1 week ago
On 2/20/26 09:12, Xin Li wrote:
>> I _really_ thought this was discussed upfront by Xin before he sent out his
>> first version of the series.
> I actually reached out to the Intel architects about this before I started
> coding. Turns out, if the CPU supports WRMSRNS, you can use it across the
> board.  The hardware is smart enough to perform a serialized write whenever
> a non-serialized one isn't proper, so there’s no risk.

Could we be a little more specific here, please?

If it was universally safe to s/WRMSR/WRMSRNS/, then there wouldn't have
been a need for WRMSRNS in the ISA.

Even the WRMSRNS description in the SDM talks about some caveats with
"performance-monitor events" MSRs. That sounds like it contradicts the
idea that the "hardware is smart enough" universally to tolerate using
WRMSRNS *EVERYWHERE*.

It also says:

	Like WRMSR, WRMSRNS will ensure that all operations before it do
	not use the new MSR value and that all operations after the
	WRMSRNS do use the new value.

Which is a handy guarantee for sure. But, it's far short of a fully
serializing instruction.
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Xin Li 1 month, 1 week ago
>>> I _really_ thought this was discussed upfront by Xin before he sent out his
>>> first version of the series.
>> I actually reached out to the Intel architects about this before I started
>> coding. Turns out, if the CPU supports WRMSRNS, you can use it across the
>> board.  The hardware is smart enough to perform a serialized write whenever
>> a non-serialized one isn't proper, so there’s no risk.
> 
> Could we be a little more specific here, please?

Sorry as I’m no longer with Intel, I don’t have access to those emails.

Got to mention, also to reply to Sean’s challenge, as usual I didn’t get
detailed explanation about how would hardware implement WRMSRNS,
except it falls back to do a serialized write when it’s not *proper*.


> 
> If it was universally safe to s/WRMSR/WRMSRNS/, then there wouldn't have
> been a need for WRMSRNS in the ISA.
> 
> Even the WRMSRNS description in the SDM talks about some caveats with
> "performance-monitor events" MSRs. That sounds like it contradicts the
> idea that the "hardware is smart enough" universally to tolerate using
> WRMSRNS *EVERYWHERE*.
> 
> It also says:
> 
> Like WRMSR, WRMSRNS will ensure that all operations before it do
> not use the new MSR value and that all operations after the
> WRMSRNS do use the new value.
> 
> Which is a handy guarantee for sure. But, it's far short of a fully
> serializing instruction.
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by H. Peter Anvin 1 month, 1 week ago
On February 23, 2026 9:56:28 AM PST, Xin Li <xin@zytor.com> wrote:
>
>>>> I _really_ thought this was discussed upfront by Xin before he sent out his
>>>> first version of the series.
>>> I actually reached out to the Intel architects about this before I started
>>> coding. Turns out, if the CPU supports WRMSRNS, you can use it across the
>>> board.  The hardware is smart enough to perform a serialized write whenever
>>> a non-serialized one isn't proper, so there’s no risk.
>> 
>> Could we be a little more specific here, please?
>
>Sorry as I’m no longer with Intel, I don’t have access to those emails.
>
>Got to mention, also to reply to Sean’s challenge, as usual I didn’t get
>detailed explanation about how would hardware implement WRMSRNS,
>except it falls back to do a serialized write when it’s not *proper*.
>
>
>> 
>> If it was universally safe to s/WRMSR/WRMSRNS/, then there wouldn't have
>> been a need for WRMSRNS in the ISA.
>> 
>> Even the WRMSRNS description in the SDM talks about some caveats with
>> "performance-monitor events" MSRs. That sounds like it contradicts the
>> idea that the "hardware is smart enough" universally to tolerate using
>> WRMSRNS *EVERYWHERE*.
>> 
>> It also says:
>> 
>> Like WRMSR, WRMSRNS will ensure that all operations before it do
>> not use the new MSR value and that all operations after the
>> WRMSRNS do use the new value.
>> 
>> Which is a handy guarantee for sure. But, it's far short of a fully
>> serializing instruction.
>
>

So to get a little bit of clarity here as to the architectural contract as opposed to the current implementations:

1. WRNSRNS is indeed intended as an opt-in, as opposed to declaring random registers non-serializing a posteori by sheer necessity in technical violation of the ISA.

We should not blindly replace all WRMSRs with WRMSRNS. We should, however, make wrmsrns() fall back to WRMSR on hardware which does not support it, so we can unconditionally replace it at call sites. Many, probably most, would be possible to replace, but for those that make no difference performance-wise there is really no reason to worry about the testing. 

It is also quite likely we will find cases where we need *one* serialization after writing to a whole group of MSRs. In that case, we may want to add a sync_cpu_after_wrmsrns() [or something like that] which does a sync_cpu() if and only if WRMSRNS is supported.

I don't know if there will ever be any CPUs which support WRMSRNS but not SERIALIZE, so it might be entirely reasonable to have WRMSRNS depend on SERIALIZE and not bother with the IRET fallback variation.

2. WRMSRNS *may* perform a fully serializing write if the hardware implementation does not support a faster write method for a certain MSR. This is particularly likely for MSRs that have system-wide consequences, but it is also a legitimate option for the hardware implementation for MSRs that are not expected to have any kind of performance impact (full serialization is a very easy way to ensure full consistency and so reduces implementation and verification burden.)

3. All registers, including MSRs, in x86 are subject to scoreboarding, meaning that so-called "psychic effects" (a direct effect being observable before the cause) or use of stale resources are never permitted. This does *not* imply that events cannot be observed out of order, and cross-CPU visibility has its own rules, but that is not relevant for most registers.

4. WRMSRNS immediate can be reasonably expected to be significantly faster than even WRMSRNS ecx (at least for MSRs deemed valuable to optimize), because the MSR number is available to the hardware at the very beginning of the instruction pipeline. To take proper advantage of that, it is desirable to avoid calling wrmsrns() with a non-constant value in code paths where performance matters, even if it bloats the code somewhat. The main case which I can think about that might actually matter is context-switching with perf enabled (also a good example for wanting to SERIALIZE or at least MFENCE or LFENCE after the batch write if they will have effects before returning to user space.) There is also of course the option of dynamically generating a code snippet if the list of MSRs is too dynamic.
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Sean Christopherson 1 month, 1 week ago
On Fri, Feb 20, 2026, Xin Li wrote:
> 
> > On Feb 18, 2026, at 10:44 PM, Jürgen Groß <jgross@suse.com> wrote:
> > 
> > On 18.02.26 22:37, Dave Hansen wrote:
> >> On 2/18/26 13:00, Sean Christopherson wrote:
> >>> On Wed, Feb 18, 2026, Juergen Gross wrote:
> >>>> When available use one of the non-serializing WRMSR variants (WRMSRNS
> >>>> with or without an immediate operand specifying the MSR register) in
> >>>> __wrmsrq().
> >>> Silently using a non-serializing version (or not) seems dangerous (not for KVM,
> >>> but for the kernel at-large), unless the rule is going to be that MSR writes need
> >>> to be treated as non-serializing by default.
> >> Yeah, there's no way we can do this in general. It'll work for 99% of
> >> the MSRs on 99% of the systems for a long time. Then the one new system
> >> with WRMSRNS is going to have one hell of a heisenbug that'll take years
> >> off some poor schmuck's life.
> > 
> > I _really_ thought this was discussed upfront by Xin before he sent out his
> > first version of the series.
> 
> I actually reached out to the Intel architects about this before I started
> coding. Turns out, if the CPU supports WRMSRNS, you can use it across the
> board.  The hardware is smart enough to perform a serialized write whenever
> a non-serialized one isn't proper, so there’s no risk.

How can hardware possibly know what's "proper"?  E.g. I don't see how hardware
can reason about safety if there's a software sequence that is subtly relying on
the serialization of WRMSR to provide some form of ordering.

And if that's the _architectural_ behavior, then what's the point of WRMSRNS?
If it's not architectural, then I don't see how the kernel can rely on it. 
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by H. Peter Anvin 1 month, 1 week ago
On February 18, 2026 1:37:42 PM PST, Dave Hansen <dave.hansen@intel.com> wrote:
>On 2/18/26 13:00, Sean Christopherson wrote:
>> On Wed, Feb 18, 2026, Juergen Gross wrote:
>>> When available use one of the non-serializing WRMSR variants (WRMSRNS
>>> with or without an immediate operand specifying the MSR register) in
>>> __wrmsrq().
>> Silently using a non-serializing version (or not) seems dangerous (not for KVM,
>> but for the kernel at-large), unless the rule is going to be that MSR writes need
>> to be treated as non-serializing by default.
>
>Yeah, there's no way we can do this in general. It'll work for 99% of
>the MSRs on 99% of the systems for a long time. Then the one new system
>with WRMSRNS is going to have one hell of a heisenbug that'll take years
>off some poor schmuck's life.
>
>We should really encourage *new* code to use wrmsrns() when it can at
>least for annotation that it doesn't need serialization. But I don't
>think we should do anything to old, working code.

Correct. We need to do this on a user by user basis.
Re: [PATCH v3 09/16] x86/msr: Use the alternatives mechanism for WRMSR
Posted by Jürgen Groß 1 month, 1 week ago
On 19.02.26 00:36, H. Peter Anvin wrote:
> On February 18, 2026 1:37:42 PM PST, Dave Hansen <dave.hansen@intel.com> wrote:
>> On 2/18/26 13:00, Sean Christopherson wrote:
>>> On Wed, Feb 18, 2026, Juergen Gross wrote:
>>>> When available use one of the non-serializing WRMSR variants (WRMSRNS
>>>> with or without an immediate operand specifying the MSR register) in
>>>> __wrmsrq().
>>> Silently using a non-serializing version (or not) seems dangerous (not for KVM,
>>> but for the kernel at-large), unless the rule is going to be that MSR writes need
>>> to be treated as non-serializing by default.
>>
>> Yeah, there's no way we can do this in general. It'll work for 99% of
>> the MSRs on 99% of the systems for a long time. Then the one new system
>> with WRMSRNS is going to have one hell of a heisenbug that'll take years
>> off some poor schmuck's life.
>>
>> We should really encourage *new* code to use wrmsrns() when it can at
>> least for annotation that it doesn't need serialization. But I don't
>> think we should do anything to old, working code.
> 
> Correct. We need to do this on a user by user basis.

Then I'd prefer to introduce a new wrmsr_sync() function for the serializing
variant and to switch all current users which are not known to tolerate the
non-serializing form to it. The main advantage of that approach would be to
be able to use the immediate form where possible automatically.


Juergen