:p
atchew
Login
The first patch aims at addressing a recent bug report, but may also point out further more or less related issues. The other patches do some tidying found desirable while doing the investigation. 1: time: deal with negative deltas in get_s_time_fixed() 2: 3: 4: 5: Jan
The first patch aims at addressing a recent bug report, but may also point out further more or less related issues. The other patches do some tidying found desirable while doing the investigation. 1: time: deal with negative deltas in get_s_time_fixed() 2: time: scale_delta() can be static 3: HVM: drop at_tsc parameter from ->set_tsc_offset() hook 4: time: gtsc_to_gtime() is HVM-only 5: time: pv_soft_rdtsc() is PV-only Jan
Callers may pass in TSC values from before the local TSC stamp was last updated (this would in particular be the case when the TSC was latched, a time rendezvous occurs, and the latched value is used only afterwards). scale_delta(), otoh, deals with unsigned values, and hence would treat negative incoming deltas as huge positive values, yielding entirely bogus return values. Fixes: 88e64cb785c1 ("x86/HVM: use fixed TSC value when saving or restoring domain") Reported-by: Антон Марков <akmarkov45@gmail.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- An alternative might be to have scale_delta() deal with signed deltas, yet that seemed more risky to me. There could likely be more Fixes: tags; the one used is the oldest applicable one, from what I can tell. A similar issue looks to exist in read_xen_timer() and its read_cycle() helper, if we're scheduled out (and beck in) between reading of the TSC and calculation of the delta (involving ->tsc_timestamp). Am I overlooking anything there? stime2tsc() guards against negative deltas by using 0 instead; I'm not quite sure that's correct either. amd_check_erratum_1474() (next to its call to tsc_ticks2ns()) has a comment towards the TSC being "sane", but is that correct? Due to TSC_ADJUST, rdtsc() may well return a huge value (and the TSC would then wrap through 0 at some point). Shouldn't we subtract boot_tsc_stamp before calling tsc_ticks2ns()? A similar issue looks to exist in tsc_get_info(), again when rdtsc() possibly returns a huge value due to TSC_ADJUST. Once again I wonder whether we shouldn't subtract boot_tsc_stamp. --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -XXX,XX +XXX,XX @@ s_time_t get_s_time_fixed(u64 at_tsc) tsc = at_tsc; else tsc = rdtsc_ordered(); - delta = tsc - t->stamp.local_tsc; - return t->stamp.local_stime + scale_delta(delta, &t->tsc_scale); + + if ( tsc >= t->stamp.local_tsc ) + delta = scale_delta(tsc - t->stamp.local_tsc, &t->tsc_scale); + else + delta = -scale_delta(t->stamp.local_tsc - tsc, &t->tsc_scale); + + return t->stamp.local_stime + delta; } s_time_t get_s_time(void)
It's used in time.c alone. Modernize types while there. Amends: 5a82d598d2d ("viridian: unify time sources") Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/include/asm/time.h +++ b/xen/arch/x86/include/asm/time.h @@ -XXX,XX +XXX,XX @@ u64 stime2tsc(s_time_t stime); struct time_scale; void set_time_scale(struct time_scale *ts, u64 ticks_per_sec); -u64 scale_delta(u64 delta, const struct time_scale *scale); /* Programmable Interval Timer (8254) */ --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -XXX,XX +XXX,XX @@ static inline u32 mul_frac(u32 multiplic * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ -u64 scale_delta(u64 delta, const struct time_scale *scale) +static uint64_t scale_delta(uint64_t delta, const struct time_scale *scale) { - u64 product; + uint64_t product; if ( scale->shift < 0 ) delta >>= -scale->shift;
While the VMX hook never used the parameter, the SVM one lost its sole use some time ago (while the original use of the parameter had gone away even earlier). Again modernize types while there. Amends: 0cd50753eb40 ("nestedsvm: Disable TscRateMSR") Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/hvm/domain.c +++ b/xen/arch/x86/hvm/domain.c @@ -XXX,XX +XXX,XX @@ int arch_set_info_hvm_guest(struct vcpu /* Sync AP's TSC with BSP's. */ v->arch.hvm.cache_tsc_offset = d->vcpu[0]->arch.hvm.cache_tsc_offset; - hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, - d->arch.hvm.sync_tsc); + hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset); paging_update_paging_modes(v); --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -XXX,XX +XXX,XX @@ static void hvm_set_guest_tsc_fixed(stru delta_tsc = guest_tsc - tsc; v->arch.hvm.cache_tsc_offset = delta_tsc; - hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, at_tsc); + hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset); } #define hvm_set_guest_tsc(v, t) hvm_set_guest_tsc_fixed(v, t, 0) @@ -XXX,XX +XXX,XX @@ static void hvm_set_guest_tsc_msr(struct static void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust) { v->arch.hvm.cache_tsc_offset += tsc_adjust - v->arch.hvm.msr_tsc_adjust; - hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, 0); + hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset); v->arch.hvm.msr_tsc_adjust = tsc_adjust; if ( v == current ) update_vcpu_system_time(v); @@ -XXX,XX +XXX,XX @@ void hvm_vcpu_reset_state(struct vcpu *v /* Sync AP's TSC with BSP's. */ v->arch.hvm.cache_tsc_offset = v->domain->vcpu[0]->arch.hvm.cache_tsc_offset; - hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, - d->arch.hvm.sync_tsc); + hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset); v->arch.hvm.msr_tsc_adjust = 0; --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -XXX,XX +XXX,XX @@ static int cf_check svm_get_guest_pat(st return 1; } -static void cf_check svm_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc) +static void cf_check svm_set_tsc_offset(struct vcpu *v, uint64_t offset) { struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb; struct vmcb_struct *n1vmcb, *n2vmcb; --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -XXX,XX +XXX,XX @@ static void cf_check vmx_handle_cd(struc } } -static void cf_check vmx_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc) +static void cf_check vmx_set_tsc_offset(struct vcpu *v, uint64_t offset) { vmx_vmcs_enter(v); --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -XXX,XX +XXX,XX @@ static void load_shadow_guest_state(stru hvm_inject_hw_exception(X86_EXC_GP, 0); } - hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, 0); + hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset); vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmentry_fields), vmentry_fields); @@ -XXX,XX +XXX,XX @@ static void load_vvmcs_host_state(struct hvm_inject_hw_exception(X86_EXC_GP, 0); } - hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset, 0); + hvm_set_tsc_offset(v, v->arch.hvm.cache_tsc_offset); set_vvmcs(v, VM_ENTRY_INTR_INFO, 0); --- a/xen/arch/x86/include/asm/hvm/hvm.h +++ b/xen/arch/x86/include/asm/hvm/hvm.h @@ -XXX,XX +XXX,XX @@ struct hvm_function_table { int (*get_guest_pat)(struct vcpu *v, uint64_t *gpat); int (*set_guest_pat)(struct vcpu *v, uint64_t gpat); - void (*set_tsc_offset)(struct vcpu *v, u64 offset, u64 at_tsc); + void (*set_tsc_offset)(struct vcpu *v, uint64_t offset); void (*inject_event)(const struct x86_event *event); @@ -XXX,XX +XXX,XX @@ static inline void hvm_cpuid_policy_chan alternative_vcall(hvm_funcs.cpuid_policy_changed, v); } -static inline void hvm_set_tsc_offset(struct vcpu *v, uint64_t offset, - uint64_t at_tsc) +static inline void hvm_set_tsc_offset(struct vcpu *v, uint64_t offset) { - alternative_vcall(hvm_funcs.set_tsc_offset, v, offset, at_tsc); + alternative_vcall(hvm_funcs.set_tsc_offset, v, offset); } /* @@ -XXX,XX +XXX,XX @@ static inline void hvm_sync_pir_to_irr(s */ int hvm_guest_x86_mode(struct vcpu *v); void hvm_cpuid_policy_changed(struct vcpu *v); -void hvm_set_tsc_offset(struct vcpu *v, uint64_t offset, uint64_t at_tsc); +void hvm_set_tsc_offset(struct vcpu *v, uint64_t offset); /* End of prototype list */ --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -XXX,XX +XXX,XX @@ int tsc_set_info(struct domain *d, */ d->arch.hvm.sync_tsc = rdtsc(); hvm_set_tsc_offset(d->vcpu[0], - d->vcpu[0]->arch.hvm.cache_tsc_offset, - d->arch.hvm.sync_tsc); + d->vcpu[0]->arch.hvm.cache_tsc_offset); } }
Omit the function when HVM=n. With that the !HVM logic can also go away; leave an assertion. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -XXX,XX +XXX,XX @@ uint64_t gtime_to_gtsc(const struct doma return scale_delta(time, &d->arch.ns_to_vtsc); } +#ifdef CONFIG_HVM uint64_t gtsc_to_gtime(const struct domain *d, uint64_t tsc) { - u64 time = scale_delta(tsc, &d->arch.vtsc_to_ns); - - if ( !is_hvm_domain(d) ) - time += d->arch.vtsc_offset; - return time; + ASSERT(is_hvm_domain(d)); + return scale_delta(tsc, &d->arch.vtsc_to_ns); } +#endif /* CONFIG_HVM */ uint64_t pv_soft_rdtsc(const struct vcpu *v, const struct cpu_user_regs *regs) {
Omit the function when PV=n, by moving it to the sole file using it, thus allowing it to become static as well. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/include/asm/time.h +++ b/xen/arch/x86/include/asm/time.h @@ -XXX,XX +XXX,XX @@ uint64_t cf_check acpi_pm_tick_to_ns(uin uint64_t tsc_ticks2ns(uint64_t ticks); -struct cpu_user_regs; -uint64_t pv_soft_rdtsc(const struct vcpu *v, const struct cpu_user_regs *regs); uint64_t gtime_to_gtsc(const struct domain *d, uint64_t time); uint64_t gtsc_to_gtime(const struct domain *d, uint64_t tsc); --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -XXX,XX +XXX,XX @@ static uint64_t guest_efer(const struct return val; } +static uint64_t soft_rdtsc( + const struct vcpu *v, const struct cpu_user_regs *regs) +{ + s_time_t old, new, now = get_s_time(); + struct domain *d = v->domain; + + do { + old = d->arch.vtsc_last; + new = now > d->arch.vtsc_last ? now : old + 1; + } while ( cmpxchg(&d->arch.vtsc_last, old, new) != old ); + + return gtime_to_gtsc(d, new); +} + static int cf_check read_msr( unsigned int reg, uint64_t *val, struct x86_emulate_ctxt *ctxt) { @@ -XXX,XX +XXX,XX @@ static int cf_check read_msr( return X86EMUL_OKAY; case MSR_IA32_TSC: - *val = currd->arch.vtsc ? pv_soft_rdtsc(curr, ctxt->regs) : rdtsc(); + *val = currd->arch.vtsc ? soft_rdtsc(curr, ctxt->regs) : rdtsc(); return X86EMUL_OKAY; case MSR_EFER: --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -XXX,XX +XXX,XX @@ uint64_t gtsc_to_gtime(const struct doma } #endif /* CONFIG_HVM */ -uint64_t pv_soft_rdtsc(const struct vcpu *v, const struct cpu_user_regs *regs) -{ - s_time_t old, new, now = get_s_time(); - struct domain *d = v->domain; - - do { - old = d->arch.vtsc_last; - new = now > d->arch.vtsc_last ? now : old + 1; - } while ( cmpxchg(&d->arch.vtsc_last, old, new) != old ); - - return gtime_to_gtsc(d, new); -} - bool clocksource_is_tsc(void) { return plt_src.read_counter == READ_TSC_POISON;