Mirroring the cleanup to rdmsr(), do the same to wrmsr(). It now has the same
API as wrmsrl(), but we'll want to drop that wrapper in due course.
It's telling that almost all remaining users pass in 0. Most are converted
directly to WRMSRNS, but a few are not.
MSR_VIRT_SPEC_CTRL is unconditionally intercepted is orders of magnitude more
expensive than just serialising. In disable_lapic_nmi_watchdog(), the P4 case
won't run on hardware which has anything more than plain WRMSR.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
xen/arch/x86/cpu/amd.c | 2 +-
xen/arch/x86/hvm/vmx/vmcs.c | 2 +-
xen/arch/x86/include/asm/msr.h | 20 ++++++++++----------
xen/arch/x86/nmi.c | 18 +++++++++---------
xen/arch/x86/oprofile/op_model_athlon.c | 2 +-
5 files changed, 22 insertions(+), 22 deletions(-)
diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 769413e96a3f..e03fba935510 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -934,7 +934,7 @@ void amd_set_legacy_ssbd(bool enable)
return;
if (cpu_has_virt_ssbd)
- wrmsr(MSR_VIRT_SPEC_CTRL, enable ? SPEC_CTRL_SSBD : 0, 0);
+ wrmsr(MSR_VIRT_SPEC_CTRL, enable ? SPEC_CTRL_SSBD : 0);
else if (amd_legacy_ssbd)
core_set_legacy_ssbd(enable);
else
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 1fa61a944b23..328cba64387d 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -754,7 +754,7 @@ static int _vmx_cpu_up(bool bsp)
eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
if ( test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) )
eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX;
- wrmsr(MSR_IA32_FEATURE_CONTROL, eax, 0);
+ wrmsrns(MSR_IA32_FEATURE_CONTROL, eax);
}
if ( (rc = vmx_init_vmcs_config(bsp)) != 0 )
diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
index f1b2bd5adc9f..1bd27b989a4d 100644
--- a/xen/arch/x86/include/asm/msr.h
+++ b/xen/arch/x86/include/asm/msr.h
@@ -15,13 +15,17 @@
* uint64_t foo = rdmsr(MSR_BAR);
* wrmsrns(MSR_BAR, foo);
*
+ * and, if architectural serialisaition is necessary, or there are other
+ * reasons that WRMSRNS is inapplicable, then:
+ *
+ * wrmsr(MSR_BAR, foo);
+ *
* In addition, *_safe() wrappers exist to cope gracefully with a #GP.
*
*
* All legacy forms are to be phased out:
*
* rdmsrl(MSR_FOO, val);
- * wrmsr(MSR_FOO, lo, hi);
* wrmsrl(MSR_FOO, val);
*/
@@ -43,17 +47,13 @@ static inline uint64_t rdmsr(unsigned int msr)
val = a__ | ((u64)b__<<32); \
} while(0)
-#define wrmsr(msr,val1,val2) \
- __asm__ __volatile__("wrmsr" \
- : /* no outputs */ \
- : "c" (msr), "a" (val1), "d" (val2))
-
-static inline void wrmsrl(unsigned int msr, uint64_t val)
+static inline void wrmsr(unsigned int msr, uint64_t val)
{
- uint32_t lo = val, hi = val >> 32;
+ uint32_t lo = val, hi = val >> 32;
- wrmsr(msr, lo, hi);
+ asm volatile ( "wrmsr" :: "a" (lo), "d" (hi), "c" (msr) );
}
+#define wrmsrl(msr, val) wrmsr(msr, val)
/* Non-serialising WRMSR, when available. Falls back to a serialising WRMSR. */
static inline void wrmsrns(uint32_t msr, uint64_t val)
@@ -151,7 +151,7 @@ static inline void wrmsr_tsc_aux(uint32_t val)
if ( *this_tsc_aux != val )
{
- wrmsr(MSR_TSC_AUX, val, 0);
+ wrmsrns(MSR_TSC_AUX, val);
*this_tsc_aux = val;
}
}
diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
index 9793fa23168d..0d4aaa5a0b57 100644
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -218,16 +218,16 @@ void disable_lapic_nmi_watchdog(void)
return;
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
- wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+ wrmsrns(MSR_K7_EVNTSEL0, 0);
break;
case X86_VENDOR_INTEL:
switch (boot_cpu_data.x86) {
case 6:
- wrmsr(MSR_P6_EVNTSEL(0), 0, 0);
+ wrmsrns(MSR_P6_EVNTSEL(0), 0);
break;
case 15:
- wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
- wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+ wrmsr(MSR_P4_IQ_CCCR0, 0);
+ wrmsr(MSR_P4_CRU_ESCR0, 0);
break;
}
break;
@@ -282,7 +282,7 @@ static void clear_msr_range(unsigned int base, unsigned int n)
unsigned int i;
for (i = 0; i < n; i++)
- wrmsr(base+i, 0, 0);
+ wrmsrns(base + i, 0);
}
static inline void write_watchdog_counter(const char *descr)
@@ -308,11 +308,11 @@ static void setup_k7_watchdog(void)
| K7_EVNTSEL_USR
| K7_NMI_EVENT;
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ wrmsr(MSR_K7_EVNTSEL0, evntsel);
write_watchdog_counter("K7_PERFCTR0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ wrmsr(MSR_K7_EVNTSEL0, evntsel);
}
static void setup_p6_watchdog(unsigned counter)
@@ -338,11 +338,11 @@ static void setup_p6_watchdog(unsigned counter)
| P6_EVNTSEL_USR
| counter;
- wrmsr(MSR_P6_EVNTSEL(0), evntsel, 0);
+ wrmsrns(MSR_P6_EVNTSEL(0), evntsel);
write_watchdog_counter("P6_PERFCTR0");
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= P6_EVNTSEL0_ENABLE;
- wrmsr(MSR_P6_EVNTSEL(0), evntsel, 0);
+ wrmsrns(MSR_P6_EVNTSEL(0), evntsel);
}
static void setup_p4_watchdog(void)
diff --git a/xen/arch/x86/oprofile/op_model_athlon.c b/xen/arch/x86/oprofile/op_model_athlon.c
index bf897a4b6328..fd454b04c353 100644
--- a/xen/arch/x86/oprofile/op_model_athlon.c
+++ b/xen/arch/x86/oprofile/op_model_athlon.c
@@ -34,7 +34,7 @@
#define MAX_COUNTERS FAM15H_NUM_COUNTERS
#define CTR_READ(msr_content,msrs,c) do {rdmsrl(msrs->counters[(c)].addr, (msr_content));} while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
+#define CTR_WRITE(l,msrs,c) do { wrmsr(msrs->counters[(c)].addr, -l); } while (0)
#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<31)))
#define CTRL_READ(msr_content,msrs,c) do {rdmsrl(msrs->controls[(c)].addr, (msr_content));} while (0)
--
2.39.5
On 15.08.2025 22:41, Andrew Cooper wrote:
> --- a/xen/arch/x86/nmi.c
> +++ b/xen/arch/x86/nmi.c
> @@ -218,16 +218,16 @@ void disable_lapic_nmi_watchdog(void)
> return;
> switch (boot_cpu_data.x86_vendor) {
> case X86_VENDOR_AMD:
> - wrmsr(MSR_K7_EVNTSEL0, 0, 0);
> + wrmsrns(MSR_K7_EVNTSEL0, 0);
Since you switch to non-serializing here, ...
> @@ -308,11 +308,11 @@ static void setup_k7_watchdog(void)
> | K7_EVNTSEL_USR
> | K7_NMI_EVENT;
>
> - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
> + wrmsr(MSR_K7_EVNTSEL0, evntsel);
> write_watchdog_counter("K7_PERFCTR0");
> apic_write(APIC_LVTPC, APIC_DM_NMI);
> evntsel |= K7_EVNTSEL_ENABLE;
> - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
> + wrmsr(MSR_K7_EVNTSEL0, evntsel);
> }
... why not also here?
> --- a/xen/arch/x86/oprofile/op_model_athlon.c
> +++ b/xen/arch/x86/oprofile/op_model_athlon.c
> @@ -34,7 +34,7 @@
> #define MAX_COUNTERS FAM15H_NUM_COUNTERS
>
> #define CTR_READ(msr_content,msrs,c) do {rdmsrl(msrs->counters[(c)].addr, (msr_content));} while (0)
> -#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
> +#define CTR_WRITE(l,msrs,c) do { wrmsr(msrs->counters[(c)].addr, -l); } while (0)
This isn't obviously correct (as in: no functional change): The macro is,
for example, passed reset_value[] contents, which is of type unsigned long.
Quite possible that the original code was wrong, though.
In any event l wants parenthesizing.
Jan
On 19/08/2025 1:38 pm, Jan Beulich wrote:
> On 15.08.2025 22:41, Andrew Cooper wrote:
>> --- a/xen/arch/x86/nmi.c
>> +++ b/xen/arch/x86/nmi.c
>> @@ -218,16 +218,16 @@ void disable_lapic_nmi_watchdog(void)
>> return;
>> switch (boot_cpu_data.x86_vendor) {
>> case X86_VENDOR_AMD:
>> - wrmsr(MSR_K7_EVNTSEL0, 0, 0);
>> + wrmsrns(MSR_K7_EVNTSEL0, 0);
> Since you switch to non-serializing here, ...
>
>> @@ -308,11 +308,11 @@ static void setup_k7_watchdog(void)
>> | K7_EVNTSEL_USR
>> | K7_NMI_EVENT;
>>
>> - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
>> + wrmsr(MSR_K7_EVNTSEL0, evntsel);
>> write_watchdog_counter("K7_PERFCTR0");
>> apic_write(APIC_LVTPC, APIC_DM_NMI);
>> evntsel |= K7_EVNTSEL_ENABLE;
>> - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
>> + wrmsr(MSR_K7_EVNTSEL0, evntsel);
>> }
> ... why not also here?
An oversight. Fixed.
>
>> --- a/xen/arch/x86/oprofile/op_model_athlon.c
>> +++ b/xen/arch/x86/oprofile/op_model_athlon.c
>> @@ -34,7 +34,7 @@
>> #define MAX_COUNTERS FAM15H_NUM_COUNTERS
>>
>> #define CTR_READ(msr_content,msrs,c) do {rdmsrl(msrs->counters[(c)].addr, (msr_content));} while (0)
>> -#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
>> +#define CTR_WRITE(l,msrs,c) do { wrmsr(msrs->counters[(c)].addr, -l); } while (0)
> This isn't obviously correct (as in: no functional change): The macro is,
> for example, passed reset_value[] contents, which is of type unsigned long.
> Quite possible that the original code was wrong, though.
I'm pretty sure the change is correct.
Perf counters get programmed to -(count), as they generate an interrupt
when they overflow. The K8 is the oldest BKDG I can easily access, and
the counters are 48 bits wide. The same is true of Intel systems of of
the same age.
Interestingly, it is the singular omission from b5103d692aa7 which
converts everything including the Intel version of CTR_WRITE() of this
to use wrmsrl().
While looking at the mail list archives for b5103d692aa7, I found
https://lists.xenproject.org/archives/html/xen-devel/2010-06/msg01660.html
which shows that it was Christoph's attempt to turn rdmsr() and wrmsr()
into a real C functions, so I'm pretty certain that CTR_WRITE() was an
omission in b5103d692aa7.
Only 15 years late on that todo...
> In any event l wants parenthesizing.
Oh, so it does. Fixed.
~Andrew
On 21.08.2025 20:47, Andrew Cooper wrote:
> On 19/08/2025 1:38 pm, Jan Beulich wrote:
>> On 15.08.2025 22:41, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/nmi.c
>>> +++ b/xen/arch/x86/nmi.c
>>> @@ -218,16 +218,16 @@ void disable_lapic_nmi_watchdog(void)
>>> return;
>>> switch (boot_cpu_data.x86_vendor) {
>>> case X86_VENDOR_AMD:
>>> - wrmsr(MSR_K7_EVNTSEL0, 0, 0);
>>> + wrmsrns(MSR_K7_EVNTSEL0, 0);
>> Since you switch to non-serializing here, ...
>>
>>> @@ -308,11 +308,11 @@ static void setup_k7_watchdog(void)
>>> | K7_EVNTSEL_USR
>>> | K7_NMI_EVENT;
>>>
>>> - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
>>> + wrmsr(MSR_K7_EVNTSEL0, evntsel);
>>> write_watchdog_counter("K7_PERFCTR0");
>>> apic_write(APIC_LVTPC, APIC_DM_NMI);
>>> evntsel |= K7_EVNTSEL_ENABLE;
>>> - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
>>> + wrmsr(MSR_K7_EVNTSEL0, evntsel);
>>> }
>> ... why not also here?
>
> An oversight. Fixed.
>
>>
>>> --- a/xen/arch/x86/oprofile/op_model_athlon.c
>>> +++ b/xen/arch/x86/oprofile/op_model_athlon.c
>>> @@ -34,7 +34,7 @@
>>> #define MAX_COUNTERS FAM15H_NUM_COUNTERS
>>>
>>> #define CTR_READ(msr_content,msrs,c) do {rdmsrl(msrs->counters[(c)].addr, (msr_content));} while (0)
>>> -#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
>>> +#define CTR_WRITE(l,msrs,c) do { wrmsr(msrs->counters[(c)].addr, -l); } while (0)
>> This isn't obviously correct (as in: no functional change): The macro is,
>> for example, passed reset_value[] contents, which is of type unsigned long.
>> Quite possible that the original code was wrong, though.
>
> I'm pretty sure the change is correct.
>
> Perf counters get programmed to -(count), as they generate an interrupt
> when they overflow. The K8 is the oldest BKDG I can easily access, and
> the counters are 48 bits wide. The same is true of Intel systems of of
> the same age.
>
> Interestingly, it is the singular omission from b5103d692aa7 which
> converts everything including the Intel version of CTR_WRITE() of this
> to use wrmsrl().
>
> While looking at the mail list archives for b5103d692aa7, I found
> https://lists.xenproject.org/archives/html/xen-devel/2010-06/msg01660.html
> which shows that it was Christoph's attempt to turn rdmsr() and wrmsr()
> into a real C functions, so I'm pretty certain that CTR_WRITE() was an
> omission in b5103d692aa7.
Okay, so mainly in need of having the description point out there is actually
a correction of something in here. Then:
Acked-by: Jan Beulich <jbeulich@suse.com>
Jan
© 2016 - 2025 Red Hat, Inc.