[PATCH v6 1/3] KVM: x86: Advance guest TSC after deep suspend.

Suleiman Souhlal posted 3 patches 3 months ago
There is a newer version of this series
[PATCH v6 1/3] KVM: x86: Advance guest TSC after deep suspend.
Posted by Suleiman Souhlal 3 months ago
Advance guest TSC to current time after suspend when the host
TSCs went backwards.

This makes the behavior consistent between suspends where host TSC
resets and suspends where it doesn't, such as suspend-to-idle, where
in the former case if the host TSC resets, the guests' would
previously be "frozen" due to KVM's backwards TSC prevention, while
in the latter case they would advance.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Suleiman Souhlal <suleiman@google.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 28 +++++++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 639d9bcee8424d..5c465bdd6d088a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1414,6 +1414,7 @@ struct kvm_arch {
 	u64 cur_tsc_offset;
 	u64 cur_tsc_generation;
 	int nr_vcpus_matched_tsc;
+	bool host_was_suspended;
 
 	u32 default_tsc_khz;
 	bool user_set_tsc;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a9d992d5652fa0..e66bab1a1f56e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5035,7 +5035,32 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	/* Apply any externally detected TSC adjustments (due to suspend) */
 	if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
-		adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
+		unsigned long flags;
+		struct kvm *kvm;
+		bool advance;
+		u64 kernel_ns, l1_tsc, offset, tsc_now;
+
+		kvm = vcpu->kvm;
+		advance = kvm_get_time_and_clockread(&kernel_ns, &tsc_now);
+		raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
+		/*
+		 * Advance the guest's TSC to current time instead of only
+		 * preventing it from going backwards, while making sure
+		 * all the vCPUs use the same offset.
+		 */
+		if (kvm->arch.host_was_suspended && advance) {
+			l1_tsc = nsec_to_cycles(vcpu,
+						kvm->arch.kvmclock_offset + kernel_ns);
+			offset = kvm_compute_l1_tsc_offset(vcpu, l1_tsc);
+			kvm->arch.cur_tsc_offset = offset;
+			kvm_vcpu_write_tsc_offset(vcpu, offset);
+		} else if (advance)
+			kvm_vcpu_write_tsc_offset(vcpu, kvm->arch.cur_tsc_offset);
+		} else {
+			adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
+		}
+		kvm->arch.host_was_suspended = false;
+		raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
 		vcpu->arch.tsc_offset_adjustment = 0;
 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 	}
@@ -12729,6 +12754,7 @@ int kvm_arch_enable_virtualization_cpu(void)
 				kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 			}
 
+			kvm->arch.host_was_suspended = true;
 			/*
 			 * We have to disable TSC offset matching.. if you were
 			 * booting a VM while issuing an S4 host suspend....
-- 
2.50.0.727.gbf7dc18ff4-goog
Re: [PATCH v6 1/3] KVM: x86: Advance guest TSC after deep suspend.
Posted by kernel test robot 2 months, 4 weeks ago
Hi Suleiman,

kernel test robot noticed the following build errors:

[auto build test ERROR on kvm/queue]
[also build test ERROR on kvm/next kvm/linux-next linus/master v6.16-rc5 next-20250709]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Suleiman-Souhlal/KVM-x86-Advance-guest-TSC-after-deep-suspend/20250709-150751
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link:    https://lore.kernel.org/r/20250709070450.473297-2-suleiman%40google.com
patch subject: [PATCH v6 1/3] KVM: x86: Advance guest TSC after deep suspend.
config: i386-buildonly-randconfig-002-20250710 (https://download.01.org/0day-ci/archive/20250710/202507100824.oV2rHgt9-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250710/202507100824.oV2rHgt9-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507100824.oV2rHgt9-lkp@intel.com/

All errors (new ones prefixed by >>):

   arch/x86/kvm/x86.c: In function 'kvm_arch_vcpu_load':
>> arch/x86/kvm/x86.c:5044:27: error: implicit declaration of function 'kvm_get_time_and_clockread'; did you mean 'kvm_get_monotonic_and_clockread'? [-Werror=implicit-function-declaration]
    5044 |                 advance = kvm_get_time_and_clockread(&kernel_ns, &tsc_now);
         |                           ^~~~~~~~~~~~~~~~~~~~~~~~~~
         |                           kvm_get_monotonic_and_clockread
>> arch/x86/kvm/x86.c:5062:17: error: 'kvm' undeclared (first use in this function)
    5062 |                 kvm->arch.host_was_suspended = false;
         |                 ^~~
   arch/x86/kvm/x86.c:5062:17: note: each undeclared identifier is reported only once for each function it appears in
   In file included from include/linux/bitops.h:7,
                    from include/linux/kernel.h:23,
                    from include/linux/cpumask.h:11,
                    from include/linux/alloc_tag.h:13,
                    from include/linux/percpu.h:5,
                    from include/linux/context_tracking_state.h:5,
                    from include/linux/hardirq.h:5,
                    from include/linux/kvm_host.h:7,
                    from arch/x86/kvm/x86.c:20:
>> arch/x86/kvm/x86.c:5063:71: error: 'flags' undeclared (first use in this function)
    5063 |                 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
         |                                                                       ^~~~~
   include/linux/typecheck.h:11:16: note: in definition of macro 'typecheck'
      11 |         typeof(x) __dummy2; \
         |                ^
   arch/x86/kvm/x86.c:5063:17: note: in expansion of macro 'raw_spin_unlock_irqrestore'
    5063 |                 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/typecheck.h:12:25: warning: comparison of distinct pointer types lacks a cast
      12 |         (void)(&__dummy == &__dummy2); \
         |                         ^~
   include/linux/spinlock.h:281:17: note: in expansion of macro 'typecheck'
     281 |                 typecheck(unsigned long, flags);                \
         |                 ^~~~~~~~~
   arch/x86/kvm/x86.c:5063:17: note: in expansion of macro 'raw_spin_unlock_irqrestore'
    5063 |                 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.c: At top level:
>> arch/x86/kvm/x86.c:5068:9: error: expected identifier or '(' before 'if'
    5068 |         if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
         |         ^~
>> include/linux/kvm_host.h:182:39: error: expected declaration specifiers or '...' before '(' token
     182 | #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
         |                                       ^
   include/linux/kvm_host.h:186:36: note: in expansion of macro 'KVM_ARCH_REQ_FLAGS'
     186 | #define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
         |                                    ^~~~~~~~~~~~~~~~~~
   arch/x86/include/asm/kvm_host.h:94:41: note: in expansion of macro 'KVM_ARCH_REQ'
      94 | #define KVM_REQ_STEAL_UPDATE            KVM_ARCH_REQ(8)
         |                                         ^~~~~~~~~~~~
   arch/x86/kvm/x86.c:5096:26: note: in expansion of macro 'KVM_REQ_STEAL_UPDATE'
    5096 |         kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
         |                          ^~~~~~~~~~~~~~~~~~~~
>> arch/x86/kvm/x86.c:5096:48: error: unknown type name 'vcpu'
    5096 |         kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
         |                                                ^~~~
>> arch/x86/kvm/x86.c:5097:1: error: expected identifier or '(' before '}' token
    5097 | }
         | ^
   cc1: some warnings being treated as errors


vim +5044 arch/x86/kvm/x86.c

  4997	
  4998	void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  4999	{
  5000		struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  5001	
  5002		vcpu->arch.l1tf_flush_l1d = true;
  5003	
  5004		if (vcpu->scheduled_out && pmu->version && pmu->event_count) {
  5005			pmu->need_cleanup = true;
  5006			kvm_make_request(KVM_REQ_PMU, vcpu);
  5007		}
  5008	
  5009		/* Address WBINVD may be executed by guest */
  5010		if (need_emulate_wbinvd(vcpu)) {
  5011			if (kvm_x86_call(has_wbinvd_exit)())
  5012				cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
  5013			else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
  5014				smp_call_function_single(vcpu->cpu,
  5015						wbinvd_ipi, NULL, 1);
  5016		}
  5017	
  5018		kvm_x86_call(vcpu_load)(vcpu, cpu);
  5019	
  5020		if (vcpu != per_cpu(last_vcpu, cpu)) {
  5021			/*
  5022			 * Flush the branch predictor when switching vCPUs on the same
  5023			 * physical CPU, as each vCPU needs its own branch prediction
  5024			 * domain.  No IBPB is needed when switching between L1 and L2
  5025			 * on the same vCPU unless IBRS is advertised to the vCPU; that
  5026			 * is handled on the nested VM-Exit path.
  5027			 */
  5028			if (static_branch_likely(&switch_vcpu_ibpb))
  5029				indirect_branch_prediction_barrier();
  5030			per_cpu(last_vcpu, cpu) = vcpu;
  5031		}
  5032	
  5033		/* Save host pkru register if supported */
  5034		vcpu->arch.host_pkru = read_pkru();
  5035	
  5036		/* Apply any externally detected TSC adjustments (due to suspend) */
  5037		if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
  5038			unsigned long flags;
  5039			struct kvm *kvm;
  5040			bool advance;
  5041			u64 kernel_ns, l1_tsc, offset, tsc_now;
  5042	
  5043			kvm = vcpu->kvm;
> 5044			advance = kvm_get_time_and_clockread(&kernel_ns, &tsc_now);
  5045			raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
  5046			/*
  5047			 * Advance the guest's TSC to current time instead of only
  5048			 * preventing it from going backwards, while making sure
  5049			 * all the vCPUs use the same offset.
  5050			 */
  5051			if (kvm->arch.host_was_suspended && advance) {
  5052				l1_tsc = nsec_to_cycles(vcpu,
  5053							kvm->arch.kvmclock_offset + kernel_ns);
  5054				offset = kvm_compute_l1_tsc_offset(vcpu, l1_tsc);
  5055				kvm->arch.cur_tsc_offset = offset;
  5056				kvm_vcpu_write_tsc_offset(vcpu, offset);
  5057			} else if (advance)
  5058				kvm_vcpu_write_tsc_offset(vcpu, kvm->arch.cur_tsc_offset);
  5059			} else {
  5060				adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
  5061			}
> 5062			kvm->arch.host_was_suspended = false;
> 5063			raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
  5064			vcpu->arch.tsc_offset_adjustment = 0;
  5065			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
  5066		}
  5067	
> 5068		if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
  5069			s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
  5070					rdtsc() - vcpu->arch.last_host_tsc;
  5071			if (tsc_delta < 0)
  5072				mark_tsc_unstable("KVM discovered backwards TSC");
  5073	
  5074			if (kvm_check_tsc_unstable()) {
  5075				u64 offset = kvm_compute_l1_tsc_offset(vcpu,
  5076							vcpu->arch.last_guest_tsc);
  5077				kvm_vcpu_write_tsc_offset(vcpu, offset);
  5078				if (!vcpu->arch.guest_tsc_protected)
  5079					vcpu->arch.tsc_catchup = 1;
  5080			}
  5081	
  5082			if (kvm_lapic_hv_timer_in_use(vcpu))
  5083				kvm_lapic_restart_hv_timer(vcpu);
  5084	
  5085			/*
  5086			 * On a host with synchronized TSC, there is no need to update
  5087			 * kvmclock on vcpu->cpu migration
  5088			 */
  5089			if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
  5090				kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
  5091			if (vcpu->cpu != cpu)
  5092				kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
  5093			vcpu->cpu = cpu;
  5094		}
  5095	
> 5096		kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> 5097	}
  5098	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v6 1/3] KVM: x86: Advance guest TSC after deep suspend.
Posted by kernel test robot 2 months, 4 weeks ago
Hi Suleiman,

kernel test robot noticed the following build warnings:

[auto build test WARNING on kvm/queue]
[also build test WARNING on kvm/next kvm/linux-next linus/master v6.16-rc5 next-20250709]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Suleiman-Souhlal/KVM-x86-Advance-guest-TSC-after-deep-suspend/20250709-150751
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link:    https://lore.kernel.org/r/20250709070450.473297-2-suleiman%40google.com
patch subject: [PATCH v6 1/3] KVM: x86: Advance guest TSC after deep suspend.
config: i386-buildonly-randconfig-002-20250710 (https://download.01.org/0day-ci/archive/20250710/202507100515.ZQd2P9F8-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250710/202507100515.ZQd2P9F8-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507100515.ZQd2P9F8-lkp@intel.com/

All warnings (new ones prefixed by >>):

   arch/x86/kvm/x86.c: In function 'kvm_arch_vcpu_load':
   arch/x86/kvm/x86.c:5044:27: error: implicit declaration of function 'kvm_get_time_and_clockread'; did you mean 'kvm_get_monotonic_and_clockread'? [-Werror=implicit-function-declaration]
    5044 |                 advance = kvm_get_time_and_clockread(&kernel_ns, &tsc_now);
         |                           ^~~~~~~~~~~~~~~~~~~~~~~~~~
         |                           kvm_get_monotonic_and_clockread
   arch/x86/kvm/x86.c:5062:17: error: 'kvm' undeclared (first use in this function)
    5062 |                 kvm->arch.host_was_suspended = false;
         |                 ^~~
   arch/x86/kvm/x86.c:5062:17: note: each undeclared identifier is reported only once for each function it appears in
   In file included from include/linux/bitops.h:7,
                    from include/linux/kernel.h:23,
                    from include/linux/cpumask.h:11,
                    from include/linux/alloc_tag.h:13,
                    from include/linux/percpu.h:5,
                    from include/linux/context_tracking_state.h:5,
                    from include/linux/hardirq.h:5,
                    from include/linux/kvm_host.h:7,
                    from arch/x86/kvm/x86.c:20:
   arch/x86/kvm/x86.c:5063:71: error: 'flags' undeclared (first use in this function)
    5063 |                 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
         |                                                                       ^~~~~
   include/linux/typecheck.h:11:16: note: in definition of macro 'typecheck'
      11 |         typeof(x) __dummy2; \
         |                ^
   arch/x86/kvm/x86.c:5063:17: note: in expansion of macro 'raw_spin_unlock_irqrestore'
    5063 |                 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~
>> include/linux/typecheck.h:12:25: warning: comparison of distinct pointer types lacks a cast
      12 |         (void)(&__dummy == &__dummy2); \
         |                         ^~
   include/linux/spinlock.h:281:17: note: in expansion of macro 'typecheck'
     281 |                 typecheck(unsigned long, flags);                \
         |                 ^~~~~~~~~
   arch/x86/kvm/x86.c:5063:17: note: in expansion of macro 'raw_spin_unlock_irqrestore'
    5063 |                 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.c: At top level:
   arch/x86/kvm/x86.c:5068:9: error: expected identifier or '(' before 'if'
    5068 |         if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
         |         ^~
   include/linux/kvm_host.h:182:39: error: expected declaration specifiers or '...' before '(' token
     182 | #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
         |                                       ^
   include/linux/kvm_host.h:186:36: note: in expansion of macro 'KVM_ARCH_REQ_FLAGS'
     186 | #define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
         |                                    ^~~~~~~~~~~~~~~~~~
   arch/x86/include/asm/kvm_host.h:94:41: note: in expansion of macro 'KVM_ARCH_REQ'
      94 | #define KVM_REQ_STEAL_UPDATE            KVM_ARCH_REQ(8)
         |                                         ^~~~~~~~~~~~
   arch/x86/kvm/x86.c:5096:26: note: in expansion of macro 'KVM_REQ_STEAL_UPDATE'
    5096 |         kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
         |                          ^~~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.c:5096:48: error: unknown type name 'vcpu'
    5096 |         kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
         |                                                ^~~~
   arch/x86/kvm/x86.c:5097:1: error: expected identifier or '(' before '}' token
    5097 | }
         | ^
   cc1: some warnings being treated as errors


vim +12 include/linux/typecheck.h

e0deaff470900a Andrew Morton 2008-07-25   4  
e0deaff470900a Andrew Morton 2008-07-25   5  /*
e0deaff470900a Andrew Morton 2008-07-25   6   * Check at compile time that something is of a particular type.
e0deaff470900a Andrew Morton 2008-07-25   7   * Always evaluates to 1 so you may use it easily in comparisons.
e0deaff470900a Andrew Morton 2008-07-25   8   */
e0deaff470900a Andrew Morton 2008-07-25   9  #define typecheck(type,x) \
e0deaff470900a Andrew Morton 2008-07-25  10  ({	type __dummy; \
e0deaff470900a Andrew Morton 2008-07-25  11  	typeof(x) __dummy2; \
e0deaff470900a Andrew Morton 2008-07-25 @12  	(void)(&__dummy == &__dummy2); \
e0deaff470900a Andrew Morton 2008-07-25  13  	1; \
e0deaff470900a Andrew Morton 2008-07-25  14  })
e0deaff470900a Andrew Morton 2008-07-25  15  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki