hw/i386/kvm/clock.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-)
Do an update of system_time_msr address every time before reading
the value of tsc_timestamp from guest's kvmclock page.
It should be done in a forcible manner because there is a situation
when system_time_msr has been set by kvm but qemu doesn't aware of it.
This leads to updates of kvmclock_offset without respect of guest's
kvmclock values.
The situation appears when L2 linux guest runs over L1 linux guest and
the action inducing system_time_msr update is tpr access reporting.
Some L1 linux guests turn off processing TPR access and when L0
gets an L2 exit induced by TPR MSR access it doesn't enter L1 and
processed it by itself.
Thus, L1 kvm doesn't know about that TPR access happening and doesn't
exit to qemu which in turn doesn't set system_time_msr address.
This patch fixes this by making sure it knows the correct address every
time it is needed.
Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
hw/i386/kvm/clock.c | 32 +++++++++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index e713162..035196a 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -48,11 +48,38 @@ struct pvclock_vcpu_time_info {
uint8_t pad[2];
} __attribute__((__packed__)); /* 32 bytes */
+static void update_all_system_time_msr(void)
+{
+ CPUState *cpu;
+ CPUX86State *env;
+ struct {
+ struct kvm_msrs info;
+ struct kvm_msr_entry entries[1];
+ } msr_data;
+ int ret;
+
+ msr_data.info.nmsrs = 1;
+ msr_data.entries[0].index = MSR_KVM_SYSTEM_TIME;
+
+ CPU_FOREACH(cpu) {
+ ret = kvm_vcpu_ioctl(cpu, KVM_GET_MSRS, &msr_data);
+
+ if (ret < 0) {
+ fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret));
+ abort();
+ }
+
+ assert(ret == 1);
+ env = cpu->env_ptr;
+ env->system_time_msr = msr_data.entries[0].data;
+ }
+}
+
static uint64_t kvmclock_current_nsec(KVMClockState *s)
{
CPUState *cpu = first_cpu;
CPUX86State *env = cpu->env_ptr;
- hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
+ hwaddr kvmclock_struct_pa;
uint64_t migration_tsc = env->tsc;
struct pvclock_vcpu_time_info time;
uint64_t delta;
@@ -60,6 +87,9 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s)
uint64_t nsec_hi;
uint64_t nsec;
+ update_all_system_time_msr();
+ kvmclock_struct_pa = env->system_time_msr & ~1ULL;
+
if (!(env->system_time_msr & 1ULL)) {
/* KVM clock not active */
return 0;
--
2.7.4
On 05/24/2017 05:07 PM, Denis Plotnikov wrote: > Do an update of system_time_msr address every time before reading > the value of tsc_timestamp from guest's kvmclock page. > > It should be done in a forcible manner because there is a situation > when system_time_msr has been set by kvm but qemu doesn't aware of it. > This leads to updates of kvmclock_offset without respect of guest's > kvmclock values. > > The situation appears when L2 linux guest runs over L1 linux guest and > the action inducing system_time_msr update is tpr access reporting. > Some L1 linux guests turn off processing TPR access and when L0 > gets an L2 exit induced by TPR MSR access it doesn't enter L1 and > processed it by itself. > Thus, L1 kvm doesn't know about that TPR access happening and doesn't > exit to qemu which in turn doesn't set system_time_msr address. > > This patch fixes this by making sure it knows the correct address every > time it is needed. > > Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> > --- > hw/i386/kvm/clock.c | 32 +++++++++++++++++++++++++++++++- > 1 file changed, 31 insertions(+), 1 deletion(-) > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > index e713162..035196a 100644 > --- a/hw/i386/kvm/clock.c > +++ b/hw/i386/kvm/clock.c > @@ -48,11 +48,38 @@ struct pvclock_vcpu_time_info { > uint8_t pad[2]; > } __attribute__((__packed__)); /* 32 bytes */ > > +static void update_all_system_time_msr(void) > +{ > + CPUState *cpu; > + CPUX86State *env; > + struct { > + struct kvm_msrs info; > + struct kvm_msr_entry entries[1]; > + } msr_data; > + int ret; > + > + msr_data.info.nmsrs = 1; > + msr_data.entries[0].index = MSR_KVM_SYSTEM_TIME; > + > + CPU_FOREACH(cpu) { > + ret = kvm_vcpu_ioctl(cpu, KVM_GET_MSRS, &msr_data); > + > + if (ret < 0) { > + fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret)); > + abort(); > + } > + > + assert(ret == 1); > + env = cpu->env_ptr; > + env->system_time_msr = msr_data.entries[0].data; > + } > +} > + > static uint64_t kvmclock_current_nsec(KVMClockState *s) > { > CPUState *cpu = first_cpu; > CPUX86State *env = cpu->env_ptr; > - hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; > + hwaddr kvmclock_struct_pa; > uint64_t migration_tsc = env->tsc; > struct pvclock_vcpu_time_info time; > uint64_t delta; > @@ -60,6 +87,9 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) > uint64_t nsec_hi; > uint64_t nsec; > > + update_all_system_time_msr(); > + kvmclock_struct_pa = env->system_time_msr & ~1ULL; > + should we do this once/per guest boot? Den > if (!(env->system_time_msr & 1ULL)) { > /* KVM clock not active */ > return 0;
On 24.05.2017 17:09, Denis V. Lunev wrote: > On 05/24/2017 05:07 PM, Denis Plotnikov wrote: >> Do an update of system_time_msr address every time before reading >> the value of tsc_timestamp from guest's kvmclock page. >> >> It should be done in a forcible manner because there is a situation >> when system_time_msr has been set by kvm but qemu doesn't aware of it. >> This leads to updates of kvmclock_offset without respect of guest's >> kvmclock values. >> >> The situation appears when L2 linux guest runs over L1 linux guest and >> the action inducing system_time_msr update is tpr access reporting. >> Some L1 linux guests turn off processing TPR access and when L0 >> gets an L2 exit induced by TPR MSR access it doesn't enter L1 and >> processed it by itself. >> Thus, L1 kvm doesn't know about that TPR access happening and doesn't >> exit to qemu which in turn doesn't set system_time_msr address. >> >> This patch fixes this by making sure it knows the correct address every >> time it is needed. >> >> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> >> --- >> hw/i386/kvm/clock.c | 32 +++++++++++++++++++++++++++++++- >> 1 file changed, 31 insertions(+), 1 deletion(-) >> >> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c >> index e713162..035196a 100644 >> --- a/hw/i386/kvm/clock.c >> +++ b/hw/i386/kvm/clock.c >> @@ -48,11 +48,38 @@ struct pvclock_vcpu_time_info { >> uint8_t pad[2]; >> } __attribute__((__packed__)); /* 32 bytes */ >> >> +static void update_all_system_time_msr(void) >> +{ >> + CPUState *cpu; >> + CPUX86State *env; >> + struct { >> + struct kvm_msrs info; >> + struct kvm_msr_entry entries[1]; >> + } msr_data; >> + int ret; >> + >> + msr_data.info.nmsrs = 1; >> + msr_data.entries[0].index = MSR_KVM_SYSTEM_TIME; >> + >> + CPU_FOREACH(cpu) { >> + ret = kvm_vcpu_ioctl(cpu, KVM_GET_MSRS, &msr_data); >> + >> + if (ret < 0) { >> + fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret)); >> + abort(); >> + } >> + >> + assert(ret == 1); >> + env = cpu->env_ptr; >> + env->system_time_msr = msr_data.entries[0].data; >> + } >> +} >> + >> static uint64_t kvmclock_current_nsec(KVMClockState *s) >> { >> CPUState *cpu = first_cpu; >> CPUX86State *env = cpu->env_ptr; >> - hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; >> + hwaddr kvmclock_struct_pa; >> uint64_t migration_tsc = env->tsc; >> struct pvclock_vcpu_time_info time; >> uint64_t delta; >> @@ -60,6 +87,9 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) >> uint64_t nsec_hi; >> uint64_t nsec; >> >> + update_all_system_time_msr(); >> + kvmclock_struct_pa = env->system_time_msr & ~1ULL; >> + > should we do this once/per guest boot? practically - yes. I can barely imagine that the pv_clock page address may be changed after being set once. But we don't know the exact moment when the guest is going to write it. And not to be dependent of any other event I decided to check it every time before using since it won't make any performance issues because this invocation happens on vm state changes only. > > Den >> if (!(env->system_time_msr & 1ULL)) { >> /* KVM clock not active */ >> return 0; > -- Best, Denis
On Wed, May 24, 2017 at 05:07:24PM +0300, Denis Plotnikov wrote: > Do an update of system_time_msr address every time before reading > the value of tsc_timestamp from guest's kvmclock page. > > It should be done in a forcible manner because there is a situation > when system_time_msr has been set by kvm but qemu doesn't aware of it. > This leads to updates of kvmclock_offset without respect of guest's > kvmclock values. > > The situation appears when L2 linux guest runs over L1 linux guest and > the action inducing system_time_msr update is tpr access reporting. > Some L1 linux guests turn off processing TPR access and when L0 > gets an L2 exit induced by TPR MSR access it doesn't enter L1 and > processed it by itself. > Thus, L1 kvm doesn't know about that TPR access happening and doesn't > exit to qemu which in turn doesn't set system_time_msr address. > > This patch fixes this by making sure it knows the correct address every > time it is needed. > > Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> > --- > hw/i386/kvm/clock.c | 32 +++++++++++++++++++++++++++++++- > 1 file changed, 31 insertions(+), 1 deletion(-) > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > index e713162..035196a 100644 > --- a/hw/i386/kvm/clock.c > +++ b/hw/i386/kvm/clock.c > @@ -48,11 +48,38 @@ struct pvclock_vcpu_time_info { > uint8_t pad[2]; > } __attribute__((__packed__)); /* 32 bytes */ > > +static void update_all_system_time_msr(void) > +{ > + CPUState *cpu; > + CPUX86State *env; > + struct { > + struct kvm_msrs info; > + struct kvm_msr_entry entries[1]; > + } msr_data; > + int ret; > + > + msr_data.info.nmsrs = 1; > + msr_data.entries[0].index = MSR_KVM_SYSTEM_TIME; > + > + CPU_FOREACH(cpu) { > + ret = kvm_vcpu_ioctl(cpu, KVM_GET_MSRS, &msr_data); > + > + if (ret < 0) { > + fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret)); > + abort(); > + } > + > + assert(ret == 1); > + env = cpu->env_ptr; > + env->system_time_msr = msr_data.entries[0].data; > + } > +} > + > static uint64_t kvmclock_current_nsec(KVMClockState *s) > { > CPUState *cpu = first_cpu; > CPUX86State *env = cpu->env_ptr; > - hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; > + hwaddr kvmclock_struct_pa; > uint64_t migration_tsc = env->tsc; > struct pvclock_vcpu_time_info time; > uint64_t delta; > @@ -60,6 +87,9 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) > uint64_t nsec_hi; > uint64_t nsec; > > + update_all_system_time_msr(); I'd rather just cpu_synchronize_state(cpu) here. > + kvmclock_struct_pa = env->system_time_msr & ~1ULL; > + > if (!(env->system_time_msr & 1ULL)) { > /* KVM clock not active */ > return 0; Roman.
On Wed, May 24, 2017 at 06:54:09PM +0300, Roman Kagan wrote: > On Wed, May 24, 2017 at 05:07:24PM +0300, Denis Plotnikov wrote: > > Do an update of system_time_msr address every time before reading > > the value of tsc_timestamp from guest's kvmclock page. > > > > It should be done in a forcible manner because there is a situation > > when system_time_msr has been set by kvm but qemu doesn't aware of it. > > This leads to updates of kvmclock_offset without respect of guest's > > kvmclock values. > > > > The situation appears when L2 linux guest runs over L1 linux guest and > > the action inducing system_time_msr update is tpr access reporting. > > Some L1 linux guests turn off processing TPR access and when L0 > > gets an L2 exit induced by TPR MSR access it doesn't enter L1 and > > processed it by itself. > > Thus, L1 kvm doesn't know about that TPR access happening and doesn't > > exit to qemu which in turn doesn't set system_time_msr address. > > > > This patch fixes this by making sure it knows the correct address every > > time it is needed. > > > > Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> > > --- > > hw/i386/kvm/clock.c | 32 +++++++++++++++++++++++++++++++- > > 1 file changed, 31 insertions(+), 1 deletion(-) > > > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > > index e713162..035196a 100644 > > --- a/hw/i386/kvm/clock.c > > +++ b/hw/i386/kvm/clock.c > > @@ -48,11 +48,38 @@ struct pvclock_vcpu_time_info { > > uint8_t pad[2]; > > } __attribute__((__packed__)); /* 32 bytes */ > > > > +static void update_all_system_time_msr(void) > > +{ > > + CPUState *cpu; > > + CPUX86State *env; > > + struct { > > + struct kvm_msrs info; > > + struct kvm_msr_entry entries[1]; > > + } msr_data; > > + int ret; > > + > > + msr_data.info.nmsrs = 1; > > + msr_data.entries[0].index = MSR_KVM_SYSTEM_TIME; > > + > > + CPU_FOREACH(cpu) { > > + ret = kvm_vcpu_ioctl(cpu, KVM_GET_MSRS, &msr_data); > > + > > + if (ret < 0) { > > + fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret)); > > + abort(); > > + } > > + > > + assert(ret == 1); > > + env = cpu->env_ptr; > > + env->system_time_msr = msr_data.entries[0].data; > > + } > > +} > > + > > static uint64_t kvmclock_current_nsec(KVMClockState *s) > > { > > CPUState *cpu = first_cpu; > > CPUX86State *env = cpu->env_ptr; > > - hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; > > + hwaddr kvmclock_struct_pa; > > uint64_t migration_tsc = env->tsc; > > struct pvclock_vcpu_time_info time; > > uint64_t delta; > > @@ -60,6 +87,9 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) > > uint64_t nsec_hi; > > uint64_t nsec; > > > > + update_all_system_time_msr(); > > I'd rather just cpu_synchronize_state(cpu) here. > > > + kvmclock_struct_pa = env->system_time_msr & ~1ULL; > > + > > if (!(env->system_time_msr & 1ULL)) { > > /* KVM clock not active */ > > return 0; > > Roman. Can't you avoid that call to each CPU? (ie fix the synchronization of the system time address problem in some other way?)
On Wed, May 24, 2017 at 02:20:05PM -0300, Marcelo Tosatti wrote: > On Wed, May 24, 2017 at 06:54:09PM +0300, Roman Kagan wrote: > > On Wed, May 24, 2017 at 05:07:24PM +0300, Denis Plotnikov wrote: > > > Do an update of system_time_msr address every time before reading > > > the value of tsc_timestamp from guest's kvmclock page. > > > > > > It should be done in a forcible manner because there is a situation > > > when system_time_msr has been set by kvm but qemu doesn't aware of it. > > > This leads to updates of kvmclock_offset without respect of guest's > > > kvmclock values. > > > > > > The situation appears when L2 linux guest runs over L1 linux guest and > > > the action inducing system_time_msr update is tpr access reporting. > > > Some L1 linux guests turn off processing TPR access and when L0 > > > gets an L2 exit induced by TPR MSR access it doesn't enter L1 and > > > processed it by itself. > > > Thus, L1 kvm doesn't know about that TPR access happening and doesn't > > > exit to qemu which in turn doesn't set system_time_msr address. > > > > > > This patch fixes this by making sure it knows the correct address every > > > time it is needed. > > > > > > Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> > > > --- > > > hw/i386/kvm/clock.c | 32 +++++++++++++++++++++++++++++++- > > > 1 file changed, 31 insertions(+), 1 deletion(-) > > > > > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > > > index e713162..035196a 100644 > > > --- a/hw/i386/kvm/clock.c > > > +++ b/hw/i386/kvm/clock.c > > > @@ -48,11 +48,38 @@ struct pvclock_vcpu_time_info { > > > uint8_t pad[2]; > > > } __attribute__((__packed__)); /* 32 bytes */ > > > > > > +static void update_all_system_time_msr(void) > > > +{ > > > + CPUState *cpu; > > > + CPUX86State *env; > > > + struct { > > > + struct kvm_msrs info; > > > + struct kvm_msr_entry entries[1]; > > > + } msr_data; > > > + int ret; > > > + > > > + msr_data.info.nmsrs = 1; > > > + msr_data.entries[0].index = MSR_KVM_SYSTEM_TIME; > > > + > > > + CPU_FOREACH(cpu) { > > > + ret = kvm_vcpu_ioctl(cpu, KVM_GET_MSRS, &msr_data); > > > + > > > + if (ret < 0) { > > > + fprintf(stderr, "KVM_GET_MSRS failed: %s\n", strerror(ret)); > > > + abort(); > > > + } > > > + > > > + assert(ret == 1); > > > + env = cpu->env_ptr; > > > + env->system_time_msr = msr_data.entries[0].data; > > > + } > > > +} > > > + > > > static uint64_t kvmclock_current_nsec(KVMClockState *s) > > > { > > > CPUState *cpu = first_cpu; > > > CPUX86State *env = cpu->env_ptr; > > > - hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; > > > + hwaddr kvmclock_struct_pa; > > > uint64_t migration_tsc = env->tsc; > > > struct pvclock_vcpu_time_info time; > > > uint64_t delta; > > > @@ -60,6 +87,9 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) > > > uint64_t nsec_hi; > > > uint64_t nsec; > > > > > > + update_all_system_time_msr(); > > > > I'd rather just cpu_synchronize_state(cpu) here. > > > > > + kvmclock_struct_pa = env->system_time_msr & ~1ULL; > > > + > > > if (!(env->system_time_msr & 1ULL)) { > > > /* KVM clock not active */ > > > return 0; > > > > Roman. > > Can't you avoid that call to each CPU? (ie fix the synchronization > of the system time address problem in some other way?) Sorry, what call do you mean? On one hand I suggested exactly to only call cpu_synchronize_state on the current (== first) cpu. On the other, cpu_synchronize_state is heavier than just fetching a single msr. Anyway kvmclock_current_nsec is only called in kvmclock_vm_state_change callback which is certainly not performance-critical, so IMO less new code here is better than more efficiency. Or maybe I misunderstand your reason to request that the synchronization problem is fixed in some other way? Roman.
On 25/05/2017 11:40, Roman Kagan wrote: >>>> + kvmclock_struct_pa = env->system_time_msr & ~1ULL; >>>> + >>>> if (!(env->system_time_msr & 1ULL)) { >>>> /* KVM clock not active */ >>>> return 0; >>> Roman. >> Can't you avoid that call to each CPU? (ie fix the synchronization >> of the system time address problem in some other way?) > Sorry, what call do you mean? On one hand I suggested exactly to only > call cpu_synchronize_state on the current (== first) cpu. On the other, > cpu_synchronize_state is heavier than just fetching a single msr. > > Anyway kvmclock_current_nsec is only called in kvmclock_vm_state_change > callback which is certainly not performance-critical, so IMO less new > code here is better than more efficiency. > > Or maybe I misunderstand your reason to request that the synchronization > problem is fixed in some other way? Denis's patch is problematic in that KVM_GET_MSRS should run in the VCPU thread (using run_on_cpu). cpu_synchronize_state() is heavier, but solves this problem nicely. Since it's not performance critical as you say, calling cpu_synchronize_state() from kvmclock_current_nsec() seems the best solution. Paolo
© 2016 - 2024 Red Hat, Inc.