Advance guest TSC to current time after suspend when the host
TSCs went backwards.
This makes the behavior consistent between suspends where host TSC
resets and suspends where it doesn't, such as suspend-to-idle, where
in the former case if the host TSC resets, the guests' would
previously be "frozen" due to KVM's backwards TSC prevention, while
in the latter case they would advance.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Suleiman Souhlal <suleiman@google.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/x86.c | 28 +++++++++++++++++++++++++++-
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 639d9bcee8424d..5c465bdd6d088a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1414,6 +1414,7 @@ struct kvm_arch {
u64 cur_tsc_offset;
u64 cur_tsc_generation;
int nr_vcpus_matched_tsc;
+ bool host_was_suspended;
u32 default_tsc_khz;
bool user_set_tsc;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a9d992d5652fa0..e66bab1a1f56e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5035,7 +5035,32 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
- adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
+ unsigned long flags;
+ struct kvm *kvm;
+ bool advance;
+ u64 kernel_ns, l1_tsc, offset, tsc_now;
+
+ kvm = vcpu->kvm;
+ advance = kvm_get_time_and_clockread(&kernel_ns, &tsc_now);
+ raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
+ /*
+ * Advance the guest's TSC to current time instead of only
+ * preventing it from going backwards, while making sure
+ * all the vCPUs use the same offset.
+ */
+ if (kvm->arch.host_was_suspended && advance) {
+ l1_tsc = nsec_to_cycles(vcpu,
+ kvm->arch.kvmclock_offset + kernel_ns);
+ offset = kvm_compute_l1_tsc_offset(vcpu, l1_tsc);
+ kvm->arch.cur_tsc_offset = offset;
+ kvm_vcpu_write_tsc_offset(vcpu, offset);
+ } else if (advance)
+ kvm_vcpu_write_tsc_offset(vcpu, kvm->arch.cur_tsc_offset);
+ } else {
+ adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
+ }
+ kvm->arch.host_was_suspended = false;
+ raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
vcpu->arch.tsc_offset_adjustment = 0;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
@@ -12729,6 +12754,7 @@ int kvm_arch_enable_virtualization_cpu(void)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
}
+ kvm->arch.host_was_suspended = true;
/*
* We have to disable TSC offset matching.. if you were
* booting a VM while issuing an S4 host suspend....
--
2.50.0.727.gbf7dc18ff4-goog
Hi Suleiman, kernel test robot noticed the following build errors: [auto build test ERROR on kvm/queue] [also build test ERROR on kvm/next kvm/linux-next linus/master v6.16-rc5 next-20250709] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Suleiman-Souhlal/KVM-x86-Advance-guest-TSC-after-deep-suspend/20250709-150751 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue patch link: https://lore.kernel.org/r/20250709070450.473297-2-suleiman%40google.com patch subject: [PATCH v6 1/3] KVM: x86: Advance guest TSC after deep suspend. config: i386-buildonly-randconfig-002-20250710 (https://download.01.org/0day-ci/archive/20250710/202507100824.oV2rHgt9-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250710/202507100824.oV2rHgt9-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202507100824.oV2rHgt9-lkp@intel.com/ All errors (new ones prefixed by >>): arch/x86/kvm/x86.c: In function 'kvm_arch_vcpu_load': >> arch/x86/kvm/x86.c:5044:27: error: implicit declaration of function 'kvm_get_time_and_clockread'; did you mean 'kvm_get_monotonic_and_clockread'? [-Werror=implicit-function-declaration] 5044 | advance = kvm_get_time_and_clockread(&kernel_ns, &tsc_now); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ | kvm_get_monotonic_and_clockread >> arch/x86/kvm/x86.c:5062:17: error: 'kvm' undeclared (first use in this function) 5062 | kvm->arch.host_was_suspended = false; | ^~~ arch/x86/kvm/x86.c:5062:17: note: each undeclared identifier is reported only once for each function it appears in In file included from include/linux/bitops.h:7, from include/linux/kernel.h:23, from include/linux/cpumask.h:11, from include/linux/alloc_tag.h:13, from include/linux/percpu.h:5, from include/linux/context_tracking_state.h:5, from include/linux/hardirq.h:5, from include/linux/kvm_host.h:7, from arch/x86/kvm/x86.c:20: >> arch/x86/kvm/x86.c:5063:71: error: 'flags' undeclared (first use in this function) 5063 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ^~~~~ include/linux/typecheck.h:11:16: note: in definition of macro 'typecheck' 11 | typeof(x) __dummy2; \ | ^ arch/x86/kvm/x86.c:5063:17: note: in expansion of macro 'raw_spin_unlock_irqrestore' 5063 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/typecheck.h:12:25: warning: comparison of distinct pointer types lacks a cast 12 | (void)(&__dummy == &__dummy2); \ | ^~ include/linux/spinlock.h:281:17: note: in expansion of macro 'typecheck' 281 | typecheck(unsigned long, flags); \ | ^~~~~~~~~ arch/x86/kvm/x86.c:5063:17: note: in expansion of macro 'raw_spin_unlock_irqrestore' 5063 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/x86.c: At top level: >> arch/x86/kvm/x86.c:5068:9: error: expected identifier or '(' before 'if' 5068 | if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { | ^~ >> include/linux/kvm_host.h:182:39: error: expected declaration specifiers or '...' before '(' token 182 | #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ | ^ include/linux/kvm_host.h:186:36: note: in expansion of macro 'KVM_ARCH_REQ_FLAGS' 186 | #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) | ^~~~~~~~~~~~~~~~~~ arch/x86/include/asm/kvm_host.h:94:41: note: in expansion of macro 'KVM_ARCH_REQ' 94 | #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) | ^~~~~~~~~~~~ arch/x86/kvm/x86.c:5096:26: note: in expansion of macro 'KVM_REQ_STEAL_UPDATE' 5096 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | ^~~~~~~~~~~~~~~~~~~~ >> arch/x86/kvm/x86.c:5096:48: error: unknown type name 'vcpu' 5096 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | ^~~~ >> arch/x86/kvm/x86.c:5097:1: error: expected identifier or '(' before '}' token 5097 | } | ^ cc1: some warnings being treated as errors vim +5044 arch/x86/kvm/x86.c 4997 4998 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 4999 { 5000 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 5001 5002 vcpu->arch.l1tf_flush_l1d = true; 5003 5004 if (vcpu->scheduled_out && pmu->version && pmu->event_count) { 5005 pmu->need_cleanup = true; 5006 kvm_make_request(KVM_REQ_PMU, vcpu); 5007 } 5008 5009 /* Address WBINVD may be executed by guest */ 5010 if (need_emulate_wbinvd(vcpu)) { 5011 if (kvm_x86_call(has_wbinvd_exit)()) 5012 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); 5013 else if (vcpu->cpu != -1 && vcpu->cpu != cpu) 5014 smp_call_function_single(vcpu->cpu, 5015 wbinvd_ipi, NULL, 1); 5016 } 5017 5018 kvm_x86_call(vcpu_load)(vcpu, cpu); 5019 5020 if (vcpu != per_cpu(last_vcpu, cpu)) { 5021 /* 5022 * Flush the branch predictor when switching vCPUs on the same 5023 * physical CPU, as each vCPU needs its own branch prediction 5024 * domain. No IBPB is needed when switching between L1 and L2 5025 * on the same vCPU unless IBRS is advertised to the vCPU; that 5026 * is handled on the nested VM-Exit path. 5027 */ 5028 if (static_branch_likely(&switch_vcpu_ibpb)) 5029 indirect_branch_prediction_barrier(); 5030 per_cpu(last_vcpu, cpu) = vcpu; 5031 } 5032 5033 /* Save host pkru register if supported */ 5034 vcpu->arch.host_pkru = read_pkru(); 5035 5036 /* Apply any externally detected TSC adjustments (due to suspend) */ 5037 if (unlikely(vcpu->arch.tsc_offset_adjustment)) { 5038 unsigned long flags; 5039 struct kvm *kvm; 5040 bool advance; 5041 u64 kernel_ns, l1_tsc, offset, tsc_now; 5042 5043 kvm = vcpu->kvm; > 5044 advance = kvm_get_time_and_clockread(&kernel_ns, &tsc_now); 5045 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 5046 /* 5047 * Advance the guest's TSC to current time instead of only 5048 * preventing it from going backwards, while making sure 5049 * all the vCPUs use the same offset. 5050 */ 5051 if (kvm->arch.host_was_suspended && advance) { 5052 l1_tsc = nsec_to_cycles(vcpu, 5053 kvm->arch.kvmclock_offset + kernel_ns); 5054 offset = kvm_compute_l1_tsc_offset(vcpu, l1_tsc); 5055 kvm->arch.cur_tsc_offset = offset; 5056 kvm_vcpu_write_tsc_offset(vcpu, offset); 5057 } else if (advance) 5058 kvm_vcpu_write_tsc_offset(vcpu, kvm->arch.cur_tsc_offset); 5059 } else { 5060 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); 5061 } > 5062 kvm->arch.host_was_suspended = false; > 5063 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); 5064 vcpu->arch.tsc_offset_adjustment = 0; 5065 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 5066 } 5067 > 5068 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { 5069 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : 5070 rdtsc() - vcpu->arch.last_host_tsc; 5071 if (tsc_delta < 0) 5072 mark_tsc_unstable("KVM discovered backwards TSC"); 5073 5074 if (kvm_check_tsc_unstable()) { 5075 u64 offset = kvm_compute_l1_tsc_offset(vcpu, 5076 vcpu->arch.last_guest_tsc); 5077 kvm_vcpu_write_tsc_offset(vcpu, offset); 5078 if (!vcpu->arch.guest_tsc_protected) 5079 vcpu->arch.tsc_catchup = 1; 5080 } 5081 5082 if (kvm_lapic_hv_timer_in_use(vcpu)) 5083 kvm_lapic_restart_hv_timer(vcpu); 5084 5085 /* 5086 * On a host with synchronized TSC, there is no need to update 5087 * kvmclock on vcpu->cpu migration 5088 */ 5089 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) 5090 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); 5091 if (vcpu->cpu != cpu) 5092 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu); 5093 vcpu->cpu = cpu; 5094 } 5095 > 5096 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); > 5097 } 5098 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Suleiman, kernel test robot noticed the following build warnings: [auto build test WARNING on kvm/queue] [also build test WARNING on kvm/next kvm/linux-next linus/master v6.16-rc5 next-20250709] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Suleiman-Souhlal/KVM-x86-Advance-guest-TSC-after-deep-suspend/20250709-150751 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue patch link: https://lore.kernel.org/r/20250709070450.473297-2-suleiman%40google.com patch subject: [PATCH v6 1/3] KVM: x86: Advance guest TSC after deep suspend. config: i386-buildonly-randconfig-002-20250710 (https://download.01.org/0day-ci/archive/20250710/202507100515.ZQd2P9F8-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250710/202507100515.ZQd2P9F8-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202507100515.ZQd2P9F8-lkp@intel.com/ All warnings (new ones prefixed by >>): arch/x86/kvm/x86.c: In function 'kvm_arch_vcpu_load': arch/x86/kvm/x86.c:5044:27: error: implicit declaration of function 'kvm_get_time_and_clockread'; did you mean 'kvm_get_monotonic_and_clockread'? [-Werror=implicit-function-declaration] 5044 | advance = kvm_get_time_and_clockread(&kernel_ns, &tsc_now); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ | kvm_get_monotonic_and_clockread arch/x86/kvm/x86.c:5062:17: error: 'kvm' undeclared (first use in this function) 5062 | kvm->arch.host_was_suspended = false; | ^~~ arch/x86/kvm/x86.c:5062:17: note: each undeclared identifier is reported only once for each function it appears in In file included from include/linux/bitops.h:7, from include/linux/kernel.h:23, from include/linux/cpumask.h:11, from include/linux/alloc_tag.h:13, from include/linux/percpu.h:5, from include/linux/context_tracking_state.h:5, from include/linux/hardirq.h:5, from include/linux/kvm_host.h:7, from arch/x86/kvm/x86.c:20: arch/x86/kvm/x86.c:5063:71: error: 'flags' undeclared (first use in this function) 5063 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ^~~~~ include/linux/typecheck.h:11:16: note: in definition of macro 'typecheck' 11 | typeof(x) __dummy2; \ | ^ arch/x86/kvm/x86.c:5063:17: note: in expansion of macro 'raw_spin_unlock_irqrestore' 5063 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ >> include/linux/typecheck.h:12:25: warning: comparison of distinct pointer types lacks a cast 12 | (void)(&__dummy == &__dummy2); \ | ^~ include/linux/spinlock.h:281:17: note: in expansion of macro 'typecheck' 281 | typecheck(unsigned long, flags); \ | ^~~~~~~~~ arch/x86/kvm/x86.c:5063:17: note: in expansion of macro 'raw_spin_unlock_irqrestore' 5063 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/x86.c: At top level: arch/x86/kvm/x86.c:5068:9: error: expected identifier or '(' before 'if' 5068 | if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { | ^~ include/linux/kvm_host.h:182:39: error: expected declaration specifiers or '...' before '(' token 182 | #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ | ^ include/linux/kvm_host.h:186:36: note: in expansion of macro 'KVM_ARCH_REQ_FLAGS' 186 | #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) | ^~~~~~~~~~~~~~~~~~ arch/x86/include/asm/kvm_host.h:94:41: note: in expansion of macro 'KVM_ARCH_REQ' 94 | #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) | ^~~~~~~~~~~~ arch/x86/kvm/x86.c:5096:26: note: in expansion of macro 'KVM_REQ_STEAL_UPDATE' 5096 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | ^~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/x86.c:5096:48: error: unknown type name 'vcpu' 5096 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | ^~~~ arch/x86/kvm/x86.c:5097:1: error: expected identifier or '(' before '}' token 5097 | } | ^ cc1: some warnings being treated as errors vim +12 include/linux/typecheck.h e0deaff470900a Andrew Morton 2008-07-25 4 e0deaff470900a Andrew Morton 2008-07-25 5 /* e0deaff470900a Andrew Morton 2008-07-25 6 * Check at compile time that something is of a particular type. e0deaff470900a Andrew Morton 2008-07-25 7 * Always evaluates to 1 so you may use it easily in comparisons. e0deaff470900a Andrew Morton 2008-07-25 8 */ e0deaff470900a Andrew Morton 2008-07-25 9 #define typecheck(type,x) \ e0deaff470900a Andrew Morton 2008-07-25 10 ({ type __dummy; \ e0deaff470900a Andrew Morton 2008-07-25 11 typeof(x) __dummy2; \ e0deaff470900a Andrew Morton 2008-07-25 @12 (void)(&__dummy == &__dummy2); \ e0deaff470900a Andrew Morton 2008-07-25 13 1; \ e0deaff470900a Andrew Morton 2008-07-25 14 }) e0deaff470900a Andrew Morton 2008-07-25 15 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.