Some versions of Windows hang on reboot if their TSC value is greater
than 2^54. The calibration of the Hyper-V reference time overflows
and fails; as a result the processors' clock sources are out of sync.
The issue is that the TSC _should_ be reset to 0 on CPU reset and
QEMU tries to do that. However, KVM special cases writing 0 to the
TSC and thinks that QEMU is trying to hot-plug a CPU, which is
correct the first time through but not later. Thwart this valiant
effort and reset the TSC to 1 instead, but only if the CPU has been
run once.
For this to work, env->tsc has to be moved to the part of CPUArchState
that is not zeroed at the beginning of x86_cpu_reset.
Reported-by: Vadim Rozenfeld <vrozenfe@redhat.com>
Supersedes: <20220324082346.72180-1-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 13 +++++++++++++
target/i386/cpu.h | 2 +-
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index ec3b50bf6e..cb6b5467d0 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5931,6 +5931,19 @@ static void x86_cpu_reset(DeviceState *dev)
env->xstate_bv = 0;
env->pat = 0x0007040600070406ULL;
+
+ if (kvm_enabled()) {
+ /*
+ * KVM handles TSC = 0 specially and thinks we are hot-plugging
+ * a new CPU, use 1 instead to force a reset.
+ */
+ if (env->tsc != 0) {
+ env->tsc = 1;
+ }
+ } else {
+ env->tsc = 0;
+ }
+
env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT;
if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) {
env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e31e6bd8b8..982c532353 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1554,7 +1554,6 @@ typedef struct CPUArchState {
target_ulong kernelgsbase;
#endif
- uint64_t tsc;
uint64_t tsc_adjust;
uint64_t tsc_deadline;
uint64_t tsc_aux;
@@ -1708,6 +1707,7 @@ typedef struct CPUArchState {
int64_t tsc_khz;
int64_t user_tsc_khz; /* for sanity check only */
uint64_t apic_bus_freq;
+ uint64_t tsc;
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
void *xsave_buf;
uint32_t xsave_buf_len;
--
2.35.1
On Thu, Mar 24, 2022 at 06:31:36PM +0100, Paolo Bonzini wrote: > Some versions of Windows hang on reboot if their TSC value is greater > than 2^54. The calibration of the Hyper-V reference time overflows > and fails; as a result the processors' clock sources are out of sync. > > The issue is that the TSC _should_ be reset to 0 on CPU reset and > QEMU tries to do that. However, KVM special cases writing 0 to the > TSC and thinks that QEMU is trying to hot-plug a CPU, which is > correct the first time through but not later. Thwart this valiant > effort and reset the TSC to 1 instead, but only if the CPU has been > run once. > > For this to work, env->tsc has to be moved to the part of CPUArchState > that is not zeroed at the beginning of x86_cpu_reset. > > Reported-by: Vadim Rozenfeld <vrozenfe@redhat.com> > Supersedes: <20220324082346.72180-1-pbonzini@redhat.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Paolo, Won't this disable the logic to sync TSCs, making it possible for TSC of SMP guests to go out of sync? (And remember the logic to sync TSCs from within a guest is fragile, in case of VCPU overload for example). > --- > target/i386/cpu.c | 13 +++++++++++++ > target/i386/cpu.h | 2 +- > 2 files changed, 14 insertions(+), 1 deletion(-) > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index ec3b50bf6e..cb6b5467d0 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -5931,6 +5931,19 @@ static void x86_cpu_reset(DeviceState *dev) > env->xstate_bv = 0; > > env->pat = 0x0007040600070406ULL; > + > + if (kvm_enabled()) { > + /* > + * KVM handles TSC = 0 specially and thinks we are hot-plugging > + * a new CPU, use 1 instead to force a reset. > + */ > + if (env->tsc != 0) { > + env->tsc = 1; > + } > + } else { > + env->tsc = 0; > + } > + > env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT; > if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { > env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT; > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index e31e6bd8b8..982c532353 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1554,7 +1554,6 @@ typedef struct CPUArchState { > target_ulong kernelgsbase; > #endif > > - uint64_t tsc; > uint64_t tsc_adjust; > uint64_t tsc_deadline; > uint64_t tsc_aux; > @@ -1708,6 +1707,7 @@ typedef struct CPUArchState { > int64_t tsc_khz; > int64_t user_tsc_khz; /* for sanity check only */ > uint64_t apic_bus_freq; > + uint64_t tsc; > #if defined(CONFIG_KVM) || defined(CONFIG_HVF) > void *xsave_buf; > uint32_t xsave_buf_len; > -- > 2.35.1 > > >
As discussed with Marcelo offlist, KVM notices multiple CPUs being set to the same value within a small amount of time and keeps them synchronized. This is the same code that handles TSC synchronization on the destination side of migration. Paolo Il 9 maggio 2022 16:54:00 CEST, Marcelo Tosatti <mtosatti@redhat.com> ha scritto: >On Thu, Mar 24, 2022 at 06:31:36PM +0100, Paolo Bonzini wrote: >> Some versions of Windows hang on reboot if their TSC value is greater >> than 2^54. The calibration of the Hyper-V reference time overflows >> and fails; as a result the processors' clock sources are out of sync. >> >> The issue is that the TSC _should_ be reset to 0 on CPU reset and >> QEMU tries to do that. However, KVM special cases writing 0 to the >> TSC and thinks that QEMU is trying to hot-plug a CPU, which is >> correct the first time through but not later. Thwart this valiant >> effort and reset the TSC to 1 instead, but only if the CPU has been >> run once. >> >> For this to work, env->tsc has to be moved to the part of CPUArchState >> that is not zeroed at the beginning of x86_cpu_reset. >> >> Reported-by: Vadim Rozenfeld <vrozenfe@redhat.com> >> Supersedes: <20220324082346.72180-1-pbonzini@redhat.com> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > >Paolo, > >Won't this disable the logic to sync TSCs, making it possible >for TSC of SMP guests to go out of sync? (And remember the logic >to sync TSCs from within a guest is fragile, in case of VCPU overload >for example). > >> --- >> target/i386/cpu.c | 13 +++++++++++++ >> target/i386/cpu.h | 2 +- >> 2 files changed, 14 insertions(+), 1 deletion(-) >> >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c >> index ec3b50bf6e..cb6b5467d0 100644 >> --- a/target/i386/cpu.c >> +++ b/target/i386/cpu.c >> @@ -5931,6 +5931,19 @@ static void x86_cpu_reset(DeviceState *dev) >> env->xstate_bv = 0; >> >> env->pat = 0x0007040600070406ULL; >> + >> + if (kvm_enabled()) { >> + /* >> + * KVM handles TSC = 0 specially and thinks we are hot-plugging >> + * a new CPU, use 1 instead to force a reset. >> + */ >> + if (env->tsc != 0) { >> + env->tsc = 1; >> + } >> + } else { >> + env->tsc = 0; >> + } >> + >> env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT; >> if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { >> env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT; >> diff --git a/target/i386/cpu.h b/target/i386/cpu.h >> index e31e6bd8b8..982c532353 100644 >> --- a/target/i386/cpu.h >> +++ b/target/i386/cpu.h >> @@ -1554,7 +1554,6 @@ typedef struct CPUArchState { >> target_ulong kernelgsbase; >> #endif >> >> - uint64_t tsc; >> uint64_t tsc_adjust; >> uint64_t tsc_deadline; >> uint64_t tsc_aux; >> @@ -1708,6 +1707,7 @@ typedef struct CPUArchState { >> int64_t tsc_khz; >> int64_t user_tsc_khz; /* for sanity check only */ >> uint64_t apic_bus_freq; >> + uint64_t tsc; >> #if defined(CONFIG_KVM) || defined(CONFIG_HVF) >> void *xsave_buf; >> uint32_t xsave_buf_len; >> -- >> 2.35.1 >> >> >> >
© 2016 - 2024 Red Hat, Inc.