cpus: improve seqlock usage for timers_state, allow cpu_get_ticks out of BQL

[Qemu-devel] [PATCH 3/4] cpus: protect TimerState writes with a spinlock

Posted by Paolo Bonzini 7 years, 2 months ago

In the next patch, we will need to write cpu_ticks_offset from any
thread, even outside the BQL.  Currently, it is protected by the BQL
just because cpu_enable_ticks and cpu_disable_ticks happen to hold it,
but the critical sections are well delimited and it's easy to remove
the BQL dependency.

Add a spinlock that matches vm_clock_seqlock, and hold it when writing
to the TimerState.  This also lets us fix cpu_update_icount when 64-bit
atomics are not available.

Fields of TiemrState are reordered to avoid padding.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpus.c | 72 ++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 25 deletions(-)

diff --git a/cpus.c b/cpus.c
index 680706aefa..63ddd4fd21 100644
--- a/cpus.c
+++ b/cpus.c
@@ -129,21 +129,27 @@ typedef struct TimersState {
     int64_t cpu_ticks_prev;
     int64_t cpu_ticks_offset;
 
-    /* cpu_clock_offset can be read out of BQL, so protect it with
-     * this lock.
+    /* Protect fields that can be respectively read outside the
+     * BQL, and written from multiple threads.
      */
     QemuSeqLock vm_clock_seqlock;
-    int64_t cpu_clock_offset;
-    int32_t cpu_ticks_enabled;
+    QemuSpin vm_clock_lock;
+
+    int16_t cpu_ticks_enabled;
 
     /* Conversion factor from emulated instructions to virtual clock ticks.  */
-    int icount_time_shift;
+    int16_t icount_time_shift;
+
     /* Compensate for varying guest execution speed.  */
     int64_t qemu_icount_bias;
+
+    int64_t vm_clock_warp_start;
+    int64_t cpu_clock_offset;
+
     /* Only written by TCG thread */
     int64_t qemu_icount;
+
     /* for adjusting icount */
-    int64_t vm_clock_warp_start;
     QEMUTimer *icount_rt_timer;
     QEMUTimer *icount_vm_timer;
     QEMUTimer *icount_warp_timer;
@@ -244,11 +250,15 @@ void cpu_update_icount(CPUState *cpu)
     int64_t executed = cpu_get_icount_executed(cpu);
     cpu->icount_budget -= executed;
 
-#ifdef CONFIG_ATOMIC64
+#ifndef CONFIG_ATOMIC64
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+#endif
     atomic_set__nocheck(&timers_state.qemu_icount,
                         timers_state.qemu_icount + executed);
-#else /* FIXME: we need 64bit atomics to do this safely */
-    timers_state.qemu_icount += executed;
+#ifndef CONFIG_ATOMIC64
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
 #endif
 }
 
@@ -369,14 +379,15 @@ int64_t cpu_get_clock(void)
  */
 void cpu_enable_ticks(void)
 {
-    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
-    seqlock_write_begin(&timers_state.vm_clock_seqlock);
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
     if (!timers_state.cpu_ticks_enabled) {
         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
         timers_state.cpu_clock_offset -= get_clock();
         timers_state.cpu_ticks_enabled = 1;
     }
-    seqlock_write_end(&timers_state.vm_clock_seqlock);
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
 }
 
 /* disable cpu_get_ticks() : the clock is stopped. You must not call
@@ -385,14 +396,15 @@ void cpu_enable_ticks(void)
  */
 void cpu_disable_ticks(void)
 {
-    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
-    seqlock_write_begin(&timers_state.vm_clock_seqlock);
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
     if (timers_state.cpu_ticks_enabled) {
         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
         timers_state.cpu_clock_offset = cpu_get_clock_locked();
         timers_state.cpu_ticks_enabled = 0;
     }
-    seqlock_write_end(&timers_state.vm_clock_seqlock);
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
 }
 
 /* Correlation between real and virtual time is always going to be
@@ -415,7 +427,8 @@ static void icount_adjust(void)
         return;
     }
 
-    seqlock_write_begin(&timers_state.vm_clock_seqlock);
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
     cur_time = cpu_get_clock_locked();
     cur_icount = cpu_get_icount_locked();
 
@@ -439,7 +452,8 @@ static void icount_adjust(void)
     atomic_set__nocheck(&timers_state.qemu_icount_bias,
                         cur_icount - (timers_state.qemu_icount
                                       << timers_state.icount_time_shift));
-    seqlock_write_end(&timers_state.vm_clock_seqlock);
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
 }
 
 static void icount_adjust_rt(void *opaque)
@@ -480,7 +494,8 @@ static void icount_warp_rt(void)
         return;
     }
 
-    seqlock_write_begin(&timers_state.vm_clock_seqlock);
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
     if (runstate_is_running()) {
         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
                                      cpu_get_clock_locked());
@@ -500,7 +515,8 @@ static void icount_warp_rt(void)
                             timers_state.qemu_icount_bias + warp_delta);
     }
     timers_state.vm_clock_warp_start = -1;
-    seqlock_write_end(&timers_state.vm_clock_seqlock);
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
 
     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -525,10 +541,12 @@ void qtest_clock_warp(int64_t dest)
         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 
-        seqlock_write_begin(&timers_state.vm_clock_seqlock);
+        seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                           &timers_state.vm_clock_lock);
         atomic_set__nocheck(&timers_state.qemu_icount_bias,
                             timers_state.qemu_icount_bias + warp);
-        seqlock_write_end(&timers_state.vm_clock_seqlock);
+        seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                             &timers_state.vm_clock_lock);
 
         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
@@ -595,10 +613,12 @@ void qemu_start_warp_timer(void)
              * It is useful when we want a deterministic execution time,
              * isolated from host latencies.
              */
-            seqlock_write_begin(&timers_state.vm_clock_seqlock);
+            seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                               &timers_state.vm_clock_lock);
             atomic_set__nocheck(&timers_state.qemu_icount_bias,
                                 timers_state.qemu_icount_bias + deadline);
-            seqlock_write_end(&timers_state.vm_clock_seqlock);
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                                 &timers_state.vm_clock_lock);
             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
         } else {
             /*
@@ -609,12 +629,14 @@ void qemu_start_warp_timer(void)
              * you will not be sending network packets continuously instead of
              * every 100ms.
              */
-            seqlock_write_begin(&timers_state.vm_clock_seqlock);
+            seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                               &timers_state.vm_clock_lock);
             if (timers_state.vm_clock_warp_start == -1
                 || timers_state.vm_clock_warp_start > clock) {
                 timers_state.vm_clock_warp_start = clock;
             }
-            seqlock_write_end(&timers_state.vm_clock_seqlock);
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                                 &timers_state.vm_clock_lock);
             timer_mod_anticipate(timers_state.icount_warp_timer,
                                  clock + deadline);
         }
-- 
2.17.1

Re: [Qemu-devel] [3/4] cpus: protect TimerState writes with a spinlock

Posted by Pavel Dovgalyuk 7 years, 2 months ago

Hi, Paolo!

Seems that this one breaks the record/replay.

> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> In the next patch, we will need to write cpu_ticks_offset from any
> thread, even outside the BQL.  Currently, it is protected by the BQL
> just because cpu_enable_ticks and cpu_disable_ticks happen to hold it,
> but the critical sections are well delimited and it's easy to remove
> the BQL dependency.
> 
> Add a spinlock that matches vm_clock_seqlock, and hold it when writing
> to the TimerState.  This also lets us fix cpu_update_icount when 64-bit
> atomics are not available.
> 
> Fields of TiemrState are reordered to avoid padding.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  cpus.c | 72 ++++++++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 47 insertions(+), 25 deletions(-)
> 
> diff --git a/cpus.c b/cpus.c
> index 680706aefa..63ddd4fd21 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -129,21 +129,27 @@ typedef struct TimersState {
>      int64_t cpu_ticks_prev;
>      int64_t cpu_ticks_offset;
> 
> -    /* cpu_clock_offset can be read out of BQL, so protect it with
> -     * this lock.
> +    /* Protect fields that can be respectively read outside the
> +     * BQL, and written from multiple threads.
>       */
>      QemuSeqLock vm_clock_seqlock;
> -    int64_t cpu_clock_offset;
> -    int32_t cpu_ticks_enabled;
> +    QemuSpin vm_clock_lock;
> +
> +    int16_t cpu_ticks_enabled;
> 
>      /* Conversion factor from emulated instructions to virtual clock ticks.  */
> -    int icount_time_shift;
> +    int16_t icount_time_shift;
> +
>      /* Compensate for varying guest execution speed.  */
>      int64_t qemu_icount_bias;
> +
> +    int64_t vm_clock_warp_start;
> +    int64_t cpu_clock_offset;
> +
>      /* Only written by TCG thread */
>      int64_t qemu_icount;
> +
>      /* for adjusting icount */
> -    int64_t vm_clock_warp_start;
>      QEMUTimer *icount_rt_timer;
>      QEMUTimer *icount_vm_timer;
>      QEMUTimer *icount_warp_timer;
> @@ -244,11 +250,15 @@ void cpu_update_icount(CPUState *cpu)
>      int64_t executed = cpu_get_icount_executed(cpu);
>      cpu->icount_budget -= executed;
> 
> -#ifdef CONFIG_ATOMIC64
> +#ifndef CONFIG_ATOMIC64
> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                       &timers_state.vm_clock_lock);
> +#endif
>      atomic_set__nocheck(&timers_state.qemu_icount,
>                          timers_state.qemu_icount + executed);
> -#else /* FIXME: we need 64bit atomics to do this safely */
> -    timers_state.qemu_icount += executed;
> +#ifndef CONFIG_ATOMIC64
> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                         &timers_state.vm_clock_lock);
>  #endif
>  }
> 
> @@ -369,14 +379,15 @@ int64_t cpu_get_clock(void)
>   */
>  void cpu_enable_ticks(void)
>  {
> -    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                       &timers_state.vm_clock_lock);
>      if (!timers_state.cpu_ticks_enabled) {
>          timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
>          timers_state.cpu_clock_offset -= get_clock();
>          timers_state.cpu_ticks_enabled = 1;
>      }
> -    seqlock_write_end(&timers_state.vm_clock_seqlock);
> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                       &timers_state.vm_clock_lock);
>  }
> 
>  /* disable cpu_get_ticks() : the clock is stopped. You must not call
> @@ -385,14 +396,15 @@ void cpu_enable_ticks(void)
>   */
>  void cpu_disable_ticks(void)
>  {
> -    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                       &timers_state.vm_clock_lock);
>      if (timers_state.cpu_ticks_enabled) {
>          timers_state.cpu_ticks_offset += cpu_get_host_ticks();
>          timers_state.cpu_clock_offset = cpu_get_clock_locked();
>          timers_state.cpu_ticks_enabled = 0;
>      }
> -    seqlock_write_end(&timers_state.vm_clock_seqlock);
> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                         &timers_state.vm_clock_lock);
>  }
> 
>  /* Correlation between real and virtual time is always going to be
> @@ -415,7 +427,8 @@ static void icount_adjust(void)
>          return;
>      }
> 
> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                       &timers_state.vm_clock_lock);
>      cur_time = cpu_get_clock_locked();
>      cur_icount = cpu_get_icount_locked();
> 
> @@ -439,7 +452,8 @@ static void icount_adjust(void)
>      atomic_set__nocheck(&timers_state.qemu_icount_bias,
>                          cur_icount - (timers_state.qemu_icount
>                                        << timers_state.icount_time_shift));
> -    seqlock_write_end(&timers_state.vm_clock_seqlock);
> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                         &timers_state.vm_clock_lock);
>  }
> 
>  static void icount_adjust_rt(void *opaque)
> @@ -480,7 +494,8 @@ static void icount_warp_rt(void)
>          return;
>      }
> 
> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                       &timers_state.vm_clock_lock);

After locking here,

>      if (runstate_is_running()) {
>          int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
>                                       cpu_get_clock_locked());

REPLAY_CLOCK can't request icount with cpu_get_icount_raw, because
it loops infinitely here:

    do {
        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
        icount = cpu_get_icount_raw_locked();
    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));


> @@ -500,7 +515,8 @@ static void icount_warp_rt(void)
>                              timers_state.qemu_icount_bias + warp_delta);
>      }
>      timers_state.vm_clock_warp_start = -1;
> -    seqlock_write_end(&timers_state.vm_clock_seqlock);
> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                       &timers_state.vm_clock_lock);
> 
>      if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
>          qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
> @@ -525,10 +541,12 @@ void qtest_clock_warp(int64_t dest)
>          int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
>          int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
> 
> -        seqlock_write_begin(&timers_state.vm_clock_seqlock);
> +        seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                           &timers_state.vm_clock_lock);
>          atomic_set__nocheck(&timers_state.qemu_icount_bias,
>                              timers_state.qemu_icount_bias + warp);
> -        seqlock_write_end(&timers_state.vm_clock_seqlock);
> +        seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                             &timers_state.vm_clock_lock);
> 
>          qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
>          timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
> @@ -595,10 +613,12 @@ void qemu_start_warp_timer(void)
>               * It is useful when we want a deterministic execution time,
>               * isolated from host latencies.
>               */
> -            seqlock_write_begin(&timers_state.vm_clock_seqlock);
> +            seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                               &timers_state.vm_clock_lock);
>              atomic_set__nocheck(&timers_state.qemu_icount_bias,
>                                  timers_state.qemu_icount_bias + deadline);
> -            seqlock_write_end(&timers_state.vm_clock_seqlock);
> +            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                                 &timers_state.vm_clock_lock);
>              qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
>          } else {
>              /*
> @@ -609,12 +629,14 @@ void qemu_start_warp_timer(void)
>               * you will not be sending network packets continuously instead of
>               * every 100ms.
>               */
> -            seqlock_write_begin(&timers_state.vm_clock_seqlock);
> +            seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                               &timers_state.vm_clock_lock);
>              if (timers_state.vm_clock_warp_start == -1
>                  || timers_state.vm_clock_warp_start > clock) {
>                  timers_state.vm_clock_warp_start = clock;
>              }
> -            seqlock_write_end(&timers_state.vm_clock_seqlock);
> +            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                                 &timers_state.vm_clock_lock);
>              timer_mod_anticipate(timers_state.icount_warp_timer,
>                                   clock + deadline);
>          }

Pavel Dovgalyuk

Re: [Qemu-devel] [3/4] cpus: protect TimerState writes with a spinlock

Posted by Paolo Bonzini 7 years, 1 month ago

On 28/08/2018 09:23, Pavel Dovgalyuk wrote:
> Hi, Paolo!
> 
> Seems that this one breaks the record/replay.

What are the symptoms?

Paolo

>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>> In the next patch, we will need to write cpu_ticks_offset from any
>> thread, even outside the BQL.  Currently, it is protected by the BQL
>> just because cpu_enable_ticks and cpu_disable_ticks happen to hold it,
>> but the critical sections are well delimited and it's easy to remove
>> the BQL dependency.
>>
>> Add a spinlock that matches vm_clock_seqlock, and hold it when writing
>> to the TimerState.  This also lets us fix cpu_update_icount when 64-bit
>> atomics are not available.
>>
>> Fields of TiemrState are reordered to avoid padding.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  cpus.c | 72 ++++++++++++++++++++++++++++++++++++++--------------------
>>  1 file changed, 47 insertions(+), 25 deletions(-)
>>
>> diff --git a/cpus.c b/cpus.c
>> index 680706aefa..63ddd4fd21 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -129,21 +129,27 @@ typedef struct TimersState {
>>      int64_t cpu_ticks_prev;
>>      int64_t cpu_ticks_offset;
>>
>> -    /* cpu_clock_offset can be read out of BQL, so protect it with
>> -     * this lock.
>> +    /* Protect fields that can be respectively read outside the
>> +     * BQL, and written from multiple threads.
>>       */
>>      QemuSeqLock vm_clock_seqlock;
>> -    int64_t cpu_clock_offset;
>> -    int32_t cpu_ticks_enabled;
>> +    QemuSpin vm_clock_lock;
>> +
>> +    int16_t cpu_ticks_enabled;
>>
>>      /* Conversion factor from emulated instructions to virtual clock ticks.  */
>> -    int icount_time_shift;
>> +    int16_t icount_time_shift;
>> +
>>      /* Compensate for varying guest execution speed.  */
>>      int64_t qemu_icount_bias;
>> +
>> +    int64_t vm_clock_warp_start;
>> +    int64_t cpu_clock_offset;
>> +
>>      /* Only written by TCG thread */
>>      int64_t qemu_icount;
>> +
>>      /* for adjusting icount */
>> -    int64_t vm_clock_warp_start;
>>      QEMUTimer *icount_rt_timer;
>>      QEMUTimer *icount_vm_timer;
>>      QEMUTimer *icount_warp_timer;
>> @@ -244,11 +250,15 @@ void cpu_update_icount(CPUState *cpu)
>>      int64_t executed = cpu_get_icount_executed(cpu);
>>      cpu->icount_budget -= executed;
>>
>> -#ifdef CONFIG_ATOMIC64
>> +#ifndef CONFIG_ATOMIC64
>> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                       &timers_state.vm_clock_lock);
>> +#endif
>>      atomic_set__nocheck(&timers_state.qemu_icount,
>>                          timers_state.qemu_icount + executed);
>> -#else /* FIXME: we need 64bit atomics to do this safely */
>> -    timers_state.qemu_icount += executed;
>> +#ifndef CONFIG_ATOMIC64
>> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                         &timers_state.vm_clock_lock);
>>  #endif
>>  }
>>
>> @@ -369,14 +379,15 @@ int64_t cpu_get_clock(void)
>>   */
>>  void cpu_enable_ticks(void)
>>  {
>> -    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
>> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
>> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                       &timers_state.vm_clock_lock);
>>      if (!timers_state.cpu_ticks_enabled) {
>>          timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
>>          timers_state.cpu_clock_offset -= get_clock();
>>          timers_state.cpu_ticks_enabled = 1;
>>      }
>> -    seqlock_write_end(&timers_state.vm_clock_seqlock);
>> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                       &timers_state.vm_clock_lock);
>>  }
>>
>>  /* disable cpu_get_ticks() : the clock is stopped. You must not call
>> @@ -385,14 +396,15 @@ void cpu_enable_ticks(void)
>>   */
>>  void cpu_disable_ticks(void)
>>  {
>> -    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
>> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
>> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                       &timers_state.vm_clock_lock);
>>      if (timers_state.cpu_ticks_enabled) {
>>          timers_state.cpu_ticks_offset += cpu_get_host_ticks();
>>          timers_state.cpu_clock_offset = cpu_get_clock_locked();
>>          timers_state.cpu_ticks_enabled = 0;
>>      }
>> -    seqlock_write_end(&timers_state.vm_clock_seqlock);
>> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                         &timers_state.vm_clock_lock);
>>  }
>>
>>  /* Correlation between real and virtual time is always going to be
>> @@ -415,7 +427,8 @@ static void icount_adjust(void)
>>          return;
>>      }
>>
>> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
>> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                       &timers_state.vm_clock_lock);
>>      cur_time = cpu_get_clock_locked();
>>      cur_icount = cpu_get_icount_locked();
>>
>> @@ -439,7 +452,8 @@ static void icount_adjust(void)
>>      atomic_set__nocheck(&timers_state.qemu_icount_bias,
>>                          cur_icount - (timers_state.qemu_icount
>>                                        << timers_state.icount_time_shift));
>> -    seqlock_write_end(&timers_state.vm_clock_seqlock);
>> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                         &timers_state.vm_clock_lock);
>>  }
>>
>>  static void icount_adjust_rt(void *opaque)
>> @@ -480,7 +494,8 @@ static void icount_warp_rt(void)
>>          return;
>>      }
>>
>> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
>> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                       &timers_state.vm_clock_lock);
> 
> After locking here,
> 
>>      if (runstate_is_running()) {
>>          int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
>>                                       cpu_get_clock_locked());
> 
> REPLAY_CLOCK can't request icount with cpu_get_icount_raw, because
> it loops infinitely here:
> 
>     do {
>         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
>         icount = cpu_get_icount_raw_locked();
>     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
> 
> 
>> @@ -500,7 +515,8 @@ static void icount_warp_rt(void)
>>                              timers_state.qemu_icount_bias + warp_delta);
>>      }
>>      timers_state.vm_clock_warp_start = -1;
>> -    seqlock_write_end(&timers_state.vm_clock_seqlock);
>> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                       &timers_state.vm_clock_lock);
>>
>>      if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
>>          qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
>> @@ -525,10 +541,12 @@ void qtest_clock_warp(int64_t dest)
>>          int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
>>          int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
>>
>> -        seqlock_write_begin(&timers_state.vm_clock_seqlock);
>> +        seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                           &timers_state.vm_clock_lock);
>>          atomic_set__nocheck(&timers_state.qemu_icount_bias,
>>                              timers_state.qemu_icount_bias + warp);
>> -        seqlock_write_end(&timers_state.vm_clock_seqlock);
>> +        seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                             &timers_state.vm_clock_lock);
>>
>>          qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
>>          timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
>> @@ -595,10 +613,12 @@ void qemu_start_warp_timer(void)
>>               * It is useful when we want a deterministic execution time,
>>               * isolated from host latencies.
>>               */
>> -            seqlock_write_begin(&timers_state.vm_clock_seqlock);
>> +            seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                               &timers_state.vm_clock_lock);
>>              atomic_set__nocheck(&timers_state.qemu_icount_bias,
>>                                  timers_state.qemu_icount_bias + deadline);
>> -            seqlock_write_end(&timers_state.vm_clock_seqlock);
>> +            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                                 &timers_state.vm_clock_lock);
>>              qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
>>          } else {
>>              /*
>> @@ -609,12 +629,14 @@ void qemu_start_warp_timer(void)
>>               * you will not be sending network packets continuously instead of
>>               * every 100ms.
>>               */
>> -            seqlock_write_begin(&timers_state.vm_clock_seqlock);
>> +            seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                               &timers_state.vm_clock_lock);
>>              if (timers_state.vm_clock_warp_start == -1
>>                  || timers_state.vm_clock_warp_start > clock) {
>>                  timers_state.vm_clock_warp_start = clock;
>>              }
>> -            seqlock_write_end(&timers_state.vm_clock_seqlock);
>> +            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                                 &timers_state.vm_clock_lock);
>>              timer_mod_anticipate(timers_state.icount_warp_timer,
>>                                   clock + deadline);
>>          }
> 
> Pavel Dovgalyuk
>

Re: [Qemu-devel] [3/4] cpus: protect TimerState writes with a spinlock

Posted by Pavel Dovgalyuk 7 years, 1 month ago

> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> On 28/08/2018 09:23, Pavel Dovgalyuk wrote:
> > Hi, Paolo!
> >
> > Seems that this one breaks the record/replay.
> 
> What are the symptoms?

Please look below.

> >> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> >> In the next patch, we will need to write cpu_ticks_offset from any
> >> thread, even outside the BQL.  Currently, it is protected by the BQL
> >> just because cpu_enable_ticks and cpu_disable_ticks happen to hold it,
> >> but the critical sections are well delimited and it's easy to remove
> >> the BQL dependency.
> >>
> >> Add a spinlock that matches vm_clock_seqlock, and hold it when writing
> >> to the TimerState.  This also lets us fix cpu_update_icount when 64-bit
> >> atomics are not available.
> >>
> >> Fields of TiemrState are reordered to avoid padding.
> >>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> >> ---
> >>  cpus.c | 72 ++++++++++++++++++++++++++++++++++++++--------------------
> >>  1 file changed, 47 insertions(+), 25 deletions(-)
> >>


Here is the description:

> >>  static void icount_adjust_rt(void *opaque)
> >> @@ -480,7 +494,8 @@ static void icount_warp_rt(void)
> >>          return;
> >>      }
> >>
> >> -    seqlock_write_begin(&timers_state.vm_clock_seqlock);
> >> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
> >> +                       &timers_state.vm_clock_lock);
> >
> > After locking here,
> >
> >>      if (runstate_is_running()) {
> >>          int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
> >>                                       cpu_get_clock_locked());
> >
> > REPLAY_CLOCK can't request icount with cpu_get_icount_raw, because
> > it loops infinitely here:
> >
> >     do {
> >         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
> >         icount = cpu_get_icount_raw_locked();
> >     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
> >
> >



Pavel Dovgalyuk

Re: [Qemu-devel] [3/4] cpus: protect TimerState writes with a spinlock

Posted by Paolo Bonzini 7 years, 1 month ago

On 10/09/2018 07:36, Pavel Dovgalyuk wrote:
> After locking here,
> 
>>      if (runstate_is_running()) {
>>          int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
>>                                       cpu_get_clock_locked());
> REPLAY_CLOCK can't request icount with cpu_get_icount_raw, because
> it loops infinitely here:
> 
>     do {
>         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
>         icount = cpu_get_icount_raw_locked();
>     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));

Yeah, I meant to ask for the backtrace but I can see that the issue is in
replay_save_instructions().  Does this work?

diff --git a/cpus.c b/cpus.c
index 8ee6e5db93..f257a6ef12 100644
--- a/cpus.c
+++ b/cpus.c
@@ -502,8 +502,8 @@ static void icount_warp_rt(void)
     seqlock_write_lock(&timers_state.vm_clock_seqlock,
                        &timers_state.vm_clock_lock);
     if (runstate_is_running()) {
-        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
-                                     cpu_get_clock_locked());
+        int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
+                                            cpu_get_clock_locked());
         int64_t warp_delta;
 
         warp_delta = clock - timers_state.vm_clock_warp_start;
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 3ced6bc231..bb8660b4e4 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -100,14 +100,20 @@ bool replay_has_interrupt(void);
 /* Processing clocks and other time sources */
 
 /*! Save the specified clock */
-int64_t replay_save_clock(ReplayClockKind kind, int64_t clock);
+int64_t replay_save_clock(ReplayClockKind kind, int64_t clock,
+                          int64_t raw_icount);
 /*! Read the specified clock from the log or return cached data */
 int64_t replay_read_clock(ReplayClockKind kind);
 /*! Saves or reads the clock depending on the current replay mode. */
 #define REPLAY_CLOCK(clock, value)                                      \
     (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
         : replay_mode == REPLAY_MODE_RECORD                             \
-            ? replay_save_clock((clock), (value))                       \
+            ? replay_save_clock((clock), (value), cpu_get_icount_raw()) \
+        : (value))
+#define REPLAY_CLOCK_LOCKED(clock, value)                               \
+    (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
+        : replay_mode == REPLAY_MODE_RECORD                             \
+            ? replay_save_clock((clock), (value), cpu_get_icount_raw_locked()) \
         : (value))
 
 /* Events */
diff --git a/replay/replay-internal.c b/replay/replay-internal.c
index b077cb5fd5..7be4c010d0 100644
--- a/replay/replay-internal.c
+++ b/replay/replay-internal.c
@@ -217,20 +217,25 @@ void replay_mutex_unlock(void)
     }
 }
 
+void replay_advance_current_step(uint64_t current_step)
+{
+    int diff = (int)(current_step - replay_state.current_step);
+
+    /* Time can only go forward */
+    assert(diff >= 0);
+
+    if (diff > 0) {
+        replay_put_event(EVENT_INSTRUCTION);
+        replay_put_dword(diff);
+        replay_state.current_step += diff;
+    }
+}
+
 /*! Saves cached instructions. */
 void replay_save_instructions(void)
 {
     if (replay_file && replay_mode == REPLAY_MODE_RECORD) {
         g_assert(replay_mutex_locked());
-        int diff = (int)(replay_get_current_step() - replay_state.current_step);
-
-        /* Time can only go forward */
-        assert(diff >= 0);
-
-        if (diff > 0) {
-            replay_put_event(EVENT_INSTRUCTION);
-            replay_put_dword(diff);
-            replay_state.current_step += diff;
-        }
+        replay_advance_current_step(replay_get_current_step());
     }
 }
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index ac4b27b674..4f82676209 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -122,6 +122,8 @@ void replay_finish_event(void);
     data_kind variable. */
 void replay_fetch_data_kind(void);
 
+/*! Advance replay_state.current_step to the specified value. */
+void replay_advance_current_step(uint64_t current_step);
 /*! Saves queued events (like instructions and sound). */
 void replay_save_instructions(void);
 
diff --git a/replay/replay-time.c b/replay/replay-time.c
index 6a7565ec8d..17caf35e74 100644
--- a/replay/replay-time.c
+++ b/replay/replay-time.c
@@ -15,13 +15,15 @@
 #include "replay-internal.h"
 #include "qemu/error-report.h"
 
-int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
+int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, int64_t raw_icount)
 {
-
     if (replay_file) {
         g_assert(replay_mutex_locked());
 
-        replay_save_instructions();
+        /* Due to the caller's locking requirements we get the icount from it instead
+         * of using replay_save_instructions().
+         */
+        replay_advance_current_step(raw_icount);
         replay_put_event(EVENT_CLOCK + kind);
         replay_put_qword(clock);
     }

Thanks,

Paolo

Re: [Qemu-devel] [3/4] cpus: protect TimerState writes with a spinlock

Posted by Pavel Dovgalyuk 7 years, 1 month ago

> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> On 10/09/2018 07:36, Pavel Dovgalyuk wrote:
> > After locking here,
> >
> >>      if (runstate_is_running()) {
> >>          int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
> >>                                       cpu_get_clock_locked());
> > REPLAY_CLOCK can't request icount with cpu_get_icount_raw, because
> > it loops infinitely here:
> >
> >     do {
> >         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
> >         icount = cpu_get_icount_raw_locked();
> >     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
> 
> Yeah, I meant to ask for the backtrace but I can see that the issue is in
> replay_save_instructions().  Does this work?

Thanks, that works. Here is the updated diff (stubs were added).
Will you apply it?


Pavel Dovgalyuk


diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 3ced6bc..bb8660b 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -100,14 +100,20 @@ bool replay_has_interrupt(void);
 /* Processing clocks and other time sources */
 
 /*! Save the specified clock */
-int64_t replay_save_clock(ReplayClockKind kind, int64_t clock);
+int64_t replay_save_clock(ReplayClockKind kind, int64_t clock,
+                          int64_t raw_icount);
 /*! Read the specified clock from the log or return cached data */
 int64_t replay_read_clock(ReplayClockKind kind);
 /*! Saves or reads the clock depending on the current replay mode. */
 #define REPLAY_CLOCK(clock, value)                                      \
     (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
         : replay_mode == REPLAY_MODE_RECORD                             \
-            ? replay_save_clock((clock), (value))                       \
+            ? replay_save_clock((clock), (value), cpu_get_icount_raw()) \
+        : (value))
+#define REPLAY_CLOCK_LOCKED(clock, value)                               \
+    (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
+        : replay_mode == REPLAY_MODE_RECORD                             \
+            ? replay_save_clock((clock), (value), cpu_get_icount_raw_locked()) 
         : (value))
 
 /* Events */
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index ac4b27b..4f82676 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -122,6 +122,8 @@ void replay_finish_event(void);
     data_kind variable. */
 void replay_fetch_data_kind(void);
 
+/*! Advance replay_state.current_step to the specified value. */
+void replay_advance_current_step(uint64_t current_step);
 /*! Saves queued events (like instructions and sound). */
 void replay_save_instructions(void);
 
diff --git a/cpus.c b/cpus.c
index 8ee6e5d..f257a6e 100644
--- a/cpus.c
+++ b/cpus.c
@@ -502,8 +502,8 @@ static void icount_warp_rt(void)
     seqlock_write_lock(&timers_state.vm_clock_seqlock,
                        &timers_state.vm_clock_lock);
     if (runstate_is_running()) {
-        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
-                                     cpu_get_clock_locked());
+        int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
+                                            cpu_get_clock_locked());
         int64_t warp_delta;
 
         warp_delta = clock - timers_state.vm_clock_warp_start;
diff --git a/replay/replay-internal.c b/replay/replay-internal.c
index b077cb5..7be4c01 100644
--- a/replay/replay-internal.c
+++ b/replay/replay-internal.c
@@ -217,20 +217,25 @@ void replay_mutex_unlock(void)
     }
 }
 
+void replay_advance_current_step(uint64_t current_step)
+{
+    int diff = (int)(current_step - replay_state.current_step);
+
+    /* Time can only go forward */
+    assert(diff >= 0);
+
+    if (diff > 0) {
+        replay_put_event(EVENT_INSTRUCTION);
+        replay_put_dword(diff);
+        replay_state.current_step += diff;
+    }
+}
+
 /*! Saves cached instructions. */
 void replay_save_instructions(void)
 {
     if (replay_file && replay_mode == REPLAY_MODE_RECORD) {
         g_assert(replay_mutex_locked());
-        int diff = (int)(replay_get_current_step() - replay_state.current_step)
-
-        /* Time can only go forward */
-        assert(diff >= 0);
-
-        if (diff > 0) {
-            replay_put_event(EVENT_INSTRUCTION);
-            replay_put_dword(diff);
-            replay_state.current_step += diff;
-        }
+        replay_advance_current_step(replay_get_current_step());
     }
 }
diff --git a/replay/replay-time.c b/replay/replay-time.c
index 6a7565e..17caf35 100644
--- a/replay/replay-time.c
+++ b/replay/replay-time.c
@@ -15,13 +15,15 @@
 #include "replay-internal.h"
 #include "qemu/error-report.h"
 
-int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
+int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, int64_t raw_icou
 {
-
     if (replay_file) {
         g_assert(replay_mutex_locked());
 
-        replay_save_instructions();
+        /* Due to the caller's locking requirements we get the icount from it i
+         * of using replay_save_instructions().
+         */
+        replay_advance_current_step(raw_icount);
         replay_put_event(EVENT_CLOCK + kind);
         replay_put_qword(clock);
     }
diff --git a/stubs/cpu-get-icount.c b/stubs/cpu-get-icount.c
index 0b7239d..35f0c1e 100644
--- a/stubs/cpu-get-icount.c
+++ b/stubs/cpu-get-icount.c
@@ -11,6 +11,11 @@ int64_t cpu_get_icount(void)
     abort();
 }
 
+int64_t cpu_get_icount_raw(void)
+{
+    abort();
+}
+
 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
 {
     qemu_notify_event();
diff --git a/stubs/replay.c b/stubs/replay.c
index 04279ab..4ac6078 100644
--- a/stubs/replay.c
+++ b/stubs/replay.c
@@ -4,7 +4,7 @@
 
 ReplayMode replay_mode;
 
-int64_t replay_save_clock(unsigned int kind, int64_t clock)
+int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount)
 {
     abort();
     return 0;

Re: [Qemu-devel] [3/4] cpus: protect TimerState writes with a spinlock

Posted by Paolo Bonzini 7 years, 1 month ago

On 11/09/2018 08:00, Pavel Dovgalyuk wrote:
> Thanks, that works. Here is the updated diff (stubs were added).
> Will you apply it?

Yes, thanks for the quick test!

Paolo

Re: [Qemu-devel] [3/4] cpus: protect TimerState writes with a spinlock

Posted by Pavel Dovgalyuk 7 years ago

Paolo,

> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> On 11/09/2018 08:00, Pavel Dovgalyuk wrote:
> > Thanks, that works. Here is the updated diff (stubs were added).
> > Will you apply it?
> 
> Yes, thanks for the quick test!

Thanks for applying RR patches, but I think you forgot about this one.

Pavel Dovgalyuk

Re: [Qemu-devel] [3/4] cpus: protect TimerState writes with a spinlock

Posted by Paolo Bonzini 7 years ago

On 08/10/2018 09:09, Pavel Dovgalyuk wrote:
> Paolo,
> 
>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>> On 11/09/2018 08:00, Pavel Dovgalyuk wrote:
>>> Thanks, that works. Here is the updated diff (stubs were added).
>>> Will you apply it?
>>
>> Yes, thanks for the quick test!
> 
> Thanks for applying RR patches, but I think you forgot about this one.
> 
> Pavel Dovgalyuk
> 

Oops, yeah - it was because the patch you sent is a bit mangled
(truncated to 80 columns) but I've fixed it up and queued it now.

Paolo

Re: [Qemu-devel] [PATCH 3/4] cpus: protect TimerState writes with a spinlock

Posted by Emilio G. Cota 7 years, 2 months ago

On Mon, Aug 20, 2018 at 17:09:02 +0200, Paolo Bonzini wrote:
> In the next patch, we will need to write cpu_ticks_offset from any
> thread, even outside the BQL.  Currently, it is protected by the BQL
> just because cpu_enable_ticks and cpu_disable_ticks happen to hold it,
> but the critical sections are well delimited and it's easy to remove
> the BQL dependency.
> 
> Add a spinlock that matches vm_clock_seqlock, and hold it when writing
> to the TimerState.  This also lets us fix cpu_update_icount when 64-bit
> atomics are not available.
> 
> Fields of TiemrState are reordered to avoid padding.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  cpus.c | 72 ++++++++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 47 insertions(+), 25 deletions(-)
> 
> diff --git a/cpus.c b/cpus.c
> index 680706aefa..63ddd4fd21 100644
> --- a/cpus.c
> +++ b/cpus.c
(snip)
> @@ -244,11 +250,15 @@ void cpu_update_icount(CPUState *cpu)
>      int64_t executed = cpu_get_icount_executed(cpu);
>      cpu->icount_budget -= executed;
>  
> -#ifdef CONFIG_ATOMIC64
> +#ifndef CONFIG_ATOMIC64
> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
> +                       &timers_state.vm_clock_lock);
> +#endif
>      atomic_set__nocheck(&timers_state.qemu_icount,
>                          timers_state.qemu_icount + executed);
> -#else /* FIXME: we need 64bit atomics to do this safely */
> -    timers_state.qemu_icount += executed;
> +#ifndef CONFIG_ATOMIC64
> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
> +                         &timers_state.vm_clock_lock);
>  #endif


I'm puzzled by this hunk. Why are we only adding the seqlock_write
if !CONFIG_ATOMIC64, if we always read .qemu_icount with seqlock_read?

Thanks,

		Emilio

Re: [Qemu-devel] [PATCH 3/4] cpus: protect TimerState writes with a spinlock

Posted by Paolo Bonzini 7 years, 1 month ago

On 01/09/2018 00:07, Emilio G. Cota wrote:
> On Mon, Aug 20, 2018 at 17:09:02 +0200, Paolo Bonzini wrote:
>> In the next patch, we will need to write cpu_ticks_offset from any
>> thread, even outside the BQL.  Currently, it is protected by the BQL
>> just because cpu_enable_ticks and cpu_disable_ticks happen to hold it,
>> but the critical sections are well delimited and it's easy to remove
>> the BQL dependency.
>>
>> Add a spinlock that matches vm_clock_seqlock, and hold it when writing
>> to the TimerState.  This also lets us fix cpu_update_icount when 64-bit
>> atomics are not available.
>>
>> Fields of TiemrState are reordered to avoid padding.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  cpus.c | 72 ++++++++++++++++++++++++++++++++++++++--------------------
>>  1 file changed, 47 insertions(+), 25 deletions(-)
>>
>> diff --git a/cpus.c b/cpus.c
>> index 680706aefa..63ddd4fd21 100644
>> --- a/cpus.c
>> +++ b/cpus.c
> (snip)
>> @@ -244,11 +250,15 @@ void cpu_update_icount(CPUState *cpu)
>>      int64_t executed = cpu_get_icount_executed(cpu);
>>      cpu->icount_budget -= executed;
>>  
>> -#ifdef CONFIG_ATOMIC64
>> +#ifndef CONFIG_ATOMIC64
>> +    seqlock_write_lock(&timers_state.vm_clock_seqlock,
>> +                       &timers_state.vm_clock_lock);
>> +#endif
>>      atomic_set__nocheck(&timers_state.qemu_icount,
>>                          timers_state.qemu_icount + executed);
>> -#else /* FIXME: we need 64bit atomics to do this safely */
>> -    timers_state.qemu_icount += executed;
>> +#ifndef CONFIG_ATOMIC64
>> +    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>> +                         &timers_state.vm_clock_lock);
>>  #endif
> 
> 
> I'm puzzled by this hunk. Why are we only adding the seqlock_write
> if !CONFIG_ATOMIC64, if we always read .qemu_icount with seqlock_read?

Yeah, I missed that qemu_icount is read by icount_adjust and so it
indirectly affects qemu_icount_bias.  It needs the seqlock always.

Paolo

[Qemu-devel] [PATCH 1/4] cpus: protect all icount computation with seqlock
[Qemu-devel] [PATCH 2/4] seqlock: add QemuLockable support
[Qemu-devel] [PATCH 3/4] cpus: protect TimerState writes with a spinlock
[Qemu-devel] [PATCH 4/4] cpus: allow cpu_get_ticks out of BQL