[PATCH] cpu-throttle: Fix vcpu missed throttle work

alloc.young@outlook.com posted 1 patch 7 months, 2 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/SA1PR11MB67600F56B3AE6348E7D6FEBFF5FBA@SA1PR11MB6760.namprd11.prod.outlook.com
Maintainers: Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>, Paolo Bonzini <pbonzini@redhat.com>
include/hw/core/cpu.h  |  5 ++++
softmmu/cpu-throttle.c | 58 +++++++++++++++++++++++++++++++++++++-----
2 files changed, 56 insertions(+), 7 deletions(-)
[PATCH] cpu-throttle: Fix vcpu missed throttle work
Posted by alloc.young@outlook.com 7 months, 2 weeks ago
From: alloc <yangcg26@midea.com>

During migrations, vcpu may run longer than 10ms and not exit
on time. If the vcpu runs over 20ms, then it'll miss a throttle
kick and will run the whole tick. When this happens and vcpu
dirties pages fast, the migration will take long time or event
not enable to auto converge. To fix this issue, take overrun
vcpu time into account and adjust the whole sleep time.

Signed-off-by: yangchunguang <yangcg26@midea.com>
---
 include/hw/core/cpu.h  |  5 ++++
 softmmu/cpu-throttle.c | 58 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 92a4234439..0b3cc3e81e 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -430,6 +430,11 @@ struct CPUState {
      */
     bool throttle_thread_scheduled;
 
+    /* Used to keep last cpu throttle tick
+     *
+     */
+    int64_t throttle_last_tick;
+
     /*
      * Sleep throttle_us_per_full microseconds once dirty ring is full
      * if dirty page rate limit is enabled.
diff --git a/softmmu/cpu-throttle.c b/softmmu/cpu-throttle.c
index d9bb30a223..bdec8dc954 100644
--- a/softmmu/cpu-throttle.c
+++ b/softmmu/cpu-throttle.c
@@ -36,22 +36,66 @@ static unsigned int throttle_percentage;
 #define CPU_THROTTLE_PCT_MIN 1
 #define CPU_THROTTLE_PCT_MAX 99
 #define CPU_THROTTLE_TIMESLICE_NS 10000000
+#define CPU_THROTTLE_RUN_MIN_NS (CPU_THROTTLE_TIMESLICE_NS / 100)
 
 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 {
     double pct;
     double throttle_ratio;
-    int64_t sleeptime_ns, endtime_ns;
+    int64_t sleeptime_ns, endtime_ns, now, overrun_ns;
 
     if (!cpu_throttle_get_percentage()) {
         return;
     }
 
+    now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     pct = (double)cpu_throttle_get_percentage() / 100;
     throttle_ratio = pct / (1 - pct);
-    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
-    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
-    endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
+    overrun_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) - cpu->throttle_last_tick;
+    /* If vcpu runs longer than 20ms, then the vcpu will miss next throttle tick and
+    *  will run almost the full tick frame. When this happens and vcpu runs fast dirty
+    *  pages, migration may take long time or can't converge at all.
+    *
+    *  Example of guest run longer than 30ms when cpu throttle is 99%
+    *
+    *  guest run(x) throttle tick(*) guest sleep(+)
+    *
+    *            +++++...+++++x xx+++++...++++++xxxxx...xxxxxx          vcpu
+    *
+    *  ----------*----...------*------...-----*------...----*---------- timeframe
+    *
+    */
+    if (overrun_ns > (CPU_THROTTLE_TIMESLICE_NS - CPU_THROTTLE_RUN_MIN_NS)) {
+        int64_t timeframe = CPU_THROTTLE_TIMESLICE_NS / (1 - pct) + 1;
+        int64_t new_ns = overrun_ns / (1 - pct) + 1;
+        int frames;
+        int64_t adj, remainder;
+
+        frames = overrun_ns / CPU_THROTTLE_TIMESLICE_NS;
+        sleeptime_ns = overrun_ns * throttle_ratio + 1;
+        remainder = new_ns - frames * timeframe;
+        if (remainder > 0) {
+            int64_t left_ns = timeframe - remainder;
+            int64_t left_run = (1 - pct) * left_ns;
+
+            adj = left_run < CPU_THROTTLE_RUN_MIN_NS ? CPU_THROTTLE_RUN_MIN_NS - left_run : 0;
+            sleeptime_ns += left_ns * pct;
+        } else
+            adj = CPU_THROTTLE_RUN_MIN_NS;
+
+        /* Limit max vcpu sleep time to avoid guest hang,
+         * max sleep time is 10s when cpu throttle is 99%
+         */
+        if (sleeptime_ns > 10 * timeframe) {
+            adj = remainder + CPU_THROTTLE_RUN_MIN_NS;
+            sleeptime_ns = 10 * timeframe;
+        }
+        sleeptime_ns -=  adj;
+    } else
+        /* Add 1ns to fix double's rounding error (like 0.9999999...) */
+        sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
+
+    endtime_ns = now + sleeptime_ns;
     while (sleeptime_ns > 0 && !cpu->stop) {
         if (sleeptime_ns > SCALE_MS) {
             qemu_cond_timedwait_iothread(cpu->halt_cond,
@@ -70,6 +114,7 @@ static void cpu_throttle_timer_tick(void *opaque)
 {
     CPUState *cpu;
     double pct;
+    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 
     /* Stop the timer if needed */
     if (!cpu_throttle_get_percentage()) {
@@ -77,14 +122,13 @@ static void cpu_throttle_timer_tick(void *opaque)
     }
     CPU_FOREACH(cpu) {
         if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
+            cpu->throttle_last_tick = now;
             async_run_on_cpu(cpu, cpu_throttle_thread,
                              RUN_ON_CPU_NULL);
         }
     }
-
     pct = (double)cpu_throttle_get_percentage() / 100;
-    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
-                                   CPU_THROTTLE_TIMESLICE_NS / (1 - pct));
+    timer_mod(throttle_timer, now + CPU_THROTTLE_TIMESLICE_NS / (1 - pct));
 }
 
 void cpu_throttle_set(int new_throttle_pct)
-- 
2.39.3
ping: Re: [PATCH] cpu-throttle: Fix vcpu missed throttle work
Posted by alloc young 7 months, 2 weeks ago
Hi pbonzini:
     please take some to review this patch. It fixes
autoconverge migration issue for heavy memory dirty
pages. Any comment will be welcome, Thx.


On 2023/9/18 11:29, alloc.young@outlook.com wrote:
> From: alloc <yangcg26@midea.com>
> 
> During migrations, vcpu may run longer than 10ms and not exit
> on time. If the vcpu runs over 20ms, then it'll miss a throttle
> kick and will run the whole tick. When this happens and vcpu
> dirties pages fast, the migration will take long time or event
> not enable to auto converge. To fix this issue, take overrun
> vcpu time into account and adjust the whole sleep time.
> 
> Signed-off-by: yangchunguang <yangcg26@midea.com>
> ---
>   include/hw/core/cpu.h  |  5 ++++
>   softmmu/cpu-throttle.c | 58 +++++++++++++++++++++++++++++++++++++-----
>   2 files changed, 56 insertions(+), 7 deletions(-)
> 
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 92a4234439..0b3cc3e81e 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -430,6 +430,11 @@ struct CPUState {
>        */
>       bool throttle_thread_scheduled;
>   
> +    /* Used to keep last cpu throttle tick
> +     *
> +     */
> +    int64_t throttle_last_tick;
> +
>       /*
>        * Sleep throttle_us_per_full microseconds once dirty ring is full
>        * if dirty page rate limit is enabled.
> diff --git a/softmmu/cpu-throttle.c b/softmmu/cpu-throttle.c
> index d9bb30a223..bdec8dc954 100644
> --- a/softmmu/cpu-throttle.c
> +++ b/softmmu/cpu-throttle.c
> @@ -36,22 +36,66 @@ static unsigned int throttle_percentage;
>   #define CPU_THROTTLE_PCT_MIN 1
>   #define CPU_THROTTLE_PCT_MAX 99
>   #define CPU_THROTTLE_TIMESLICE_NS 10000000
> +#define CPU_THROTTLE_RUN_MIN_NS (CPU_THROTTLE_TIMESLICE_NS / 100)
>   
>   static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>   {
>       double pct;
>       double throttle_ratio;
> -    int64_t sleeptime_ns, endtime_ns;
> +    int64_t sleeptime_ns, endtime_ns, now, overrun_ns;
>   
>       if (!cpu_throttle_get_percentage()) {
>           return;
>       }
>   
> +    now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>       pct = (double)cpu_throttle_get_percentage() / 100;
>       throttle_ratio = pct / (1 - pct);
> -    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
> -    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
> -    endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
> +    overrun_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) - cpu->throttle_last_tick;
> +    /* If vcpu runs longer than 20ms, then the vcpu will miss next throttle tick and
> +    *  will run almost the full tick frame. When this happens and vcpu runs fast dirty
> +    *  pages, migration may take long time or can't converge at all.
> +    *
> +    *  Example of guest run longer than 30ms when cpu throttle is 99%
> +    *
> +    *  guest run(x) throttle tick(*) guest sleep(+)
> +    *
> +    *            +++++...+++++x xx+++++...++++++xxxxx...xxxxxx          vcpu
> +    *
> +    *  ----------*----...------*------...-----*------...----*---------- timeframe
> +    *
> +    */
> +    if (overrun_ns > (CPU_THROTTLE_TIMESLICE_NS - CPU_THROTTLE_RUN_MIN_NS)) {
> +        int64_t timeframe = CPU_THROTTLE_TIMESLICE_NS / (1 - pct) + 1;
> +        int64_t new_ns = overrun_ns / (1 - pct) + 1;
> +        int frames;
> +        int64_t adj, remainder;
> +
> +        frames = overrun_ns / CPU_THROTTLE_TIMESLICE_NS;
> +        sleeptime_ns = overrun_ns * throttle_ratio + 1;
> +        remainder = new_ns - frames * timeframe;
> +        if (remainder > 0) {
> +            int64_t left_ns = timeframe - remainder;
> +            int64_t left_run = (1 - pct) * left_ns;
> +
> +            adj = left_run < CPU_THROTTLE_RUN_MIN_NS ? CPU_THROTTLE_RUN_MIN_NS - left_run : 0;
> +            sleeptime_ns += left_ns * pct;
> +        } else
> +            adj = CPU_THROTTLE_RUN_MIN_NS;
> +
> +        /* Limit max vcpu sleep time to avoid guest hang,
> +         * max sleep time is 10s when cpu throttle is 99%
> +         */
> +        if (sleeptime_ns > 10 * timeframe) {
> +            adj = remainder + CPU_THROTTLE_RUN_MIN_NS;
> +            sleeptime_ns = 10 * timeframe;
> +        }
> +        sleeptime_ns -=  adj;
> +    } else
> +        /* Add 1ns to fix double's rounding error (like 0.9999999...) */
> +        sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
> +
> +    endtime_ns = now + sleeptime_ns;
>       while (sleeptime_ns > 0 && !cpu->stop) {
>           if (sleeptime_ns > SCALE_MS) {
>               qemu_cond_timedwait_iothread(cpu->halt_cond,
> @@ -70,6 +114,7 @@ static void cpu_throttle_timer_tick(void *opaque)
>   {
>       CPUState *cpu;
>       double pct;
> +    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
>   
>       /* Stop the timer if needed */
>       if (!cpu_throttle_get_percentage()) {
> @@ -77,14 +122,13 @@ static void cpu_throttle_timer_tick(void *opaque)
>       }
>       CPU_FOREACH(cpu) {
>           if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
> +            cpu->throttle_last_tick = now;
>               async_run_on_cpu(cpu, cpu_throttle_thread,
>                                RUN_ON_CPU_NULL);
>           }
>       }
> -
>       pct = (double)cpu_throttle_get_percentage() / 100;
> -    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
> -                                   CPU_THROTTLE_TIMESLICE_NS / (1 - pct));
> +    timer_mod(throttle_timer, now + CPU_THROTTLE_TIMESLICE_NS / (1 - pct));
>   }
>   
>   void cpu_throttle_set(int new_throttle_pct)