[PATCH v3] migration/dirtyrate: make sample page count configurable

huangy81@chinatelecom.cn posted 1 patch 2 years, 11 months ago
Test checkpatch passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/1fc52dd5abfa7590f516c3e97878ee263bff105a.1620742417.git.huangy81@chinatelecom.cn
migration/dirtyrate.c | 31 +++++++++++++++++++++++++++----
migration/dirtyrate.h |  8 +++++++-
qapi/migration.json   | 13 ++++++++++---
3 files changed, 44 insertions(+), 8 deletions(-)
[PATCH v3] migration/dirtyrate: make sample page count configurable
Posted by huangy81@chinatelecom.cn 2 years, 11 months ago
From: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>

introduce optional sample-pages argument in calc-dirty-rate,
making sample page count per GB configurable so that more
accurate dirtyrate can be calculated.

Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
---
 migration/dirtyrate.c | 31 +++++++++++++++++++++++++++----
 migration/dirtyrate.h |  8 +++++++-
 qapi/migration.json   | 13 ++++++++++---
 3 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index ccb9814..2ee3890 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -48,6 +48,12 @@ static bool is_sample_period_valid(int64_t sec)
     return true;
 }
 
+static bool is_sample_pages_valid(int64_t pages)
+{
+    return pages >= MIN_SAMPLE_PAGE_COUNT &&
+           pages <= MAX_SAMPLE_PAGE_COUNT;
+}
+
 static int dirtyrate_set_state(int *state, int old_state, int new_state)
 {
     assert(new_state < DIRTY_RATE_STATUS__MAX);
@@ -72,13 +78,15 @@ static struct DirtyRateInfo *query_dirty_rate_info(void)
     info->status = CalculatingState;
     info->start_time = DirtyStat.start_time;
     info->calc_time = DirtyStat.calc_time;
+    info->sample_pages = DirtyStat.sample_pages;
 
     trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
 
     return info;
 }
 
-static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
+static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time,
+                                uint64_t sample_pages)
 {
     DirtyStat.total_dirty_samples = 0;
     DirtyStat.total_sample_count = 0;
@@ -86,6 +94,7 @@ static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
     DirtyStat.dirty_rate = -1;
     DirtyStat.start_time = start_time;
     DirtyStat.calc_time = calc_time;
+    DirtyStat.sample_pages = sample_pages;
 }
 
 static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
@@ -361,6 +370,7 @@ void *get_dirtyrate_thread(void *arg)
     int ret;
     int64_t start_time;
     int64_t calc_time;
+    uint64_t sample_pages;
 
     ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
                               DIRTY_RATE_STATUS_MEASURING);
@@ -371,7 +381,8 @@ void *get_dirtyrate_thread(void *arg)
 
     start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
     calc_time = config.sample_period_seconds;
-    init_dirtyrate_stat(start_time, calc_time);
+    sample_pages = config.sample_pages_per_gigabytes;
+    init_dirtyrate_stat(start_time, calc_time, sample_pages);
 
     calculate_dirtyrate(config);
 
@@ -383,7 +394,8 @@ void *get_dirtyrate_thread(void *arg)
     return NULL;
 }
 
-void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
+void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
+                         int64_t sample_pages, Error **errp)
 {
     static struct DirtyRateConfig config;
     QemuThread thread;
@@ -404,6 +416,17 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
         return;
     }
 
+    if (has_sample_pages) {
+        if (!is_sample_pages_valid(sample_pages)) {
+            error_setg(errp, "sample-pages is out of range[%d, %d].",
+                            MIN_SAMPLE_PAGE_COUNT,
+                            MAX_SAMPLE_PAGE_COUNT);
+            return;
+        }
+    } else {
+        sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
+    }
+
     /*
      * Init calculation state as unstarted.
      */
@@ -415,7 +438,7 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
     }
 
     config.sample_period_seconds = calc_time;
-    config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
+    config.sample_pages_per_gigabytes = sample_pages;
     qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
                        (void *)&config, QEMU_THREAD_DETACHED);
 }
diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
index 6ec4295..e1fd290 100644
--- a/migration/dirtyrate.h
+++ b/migration/dirtyrate.h
@@ -15,7 +15,6 @@
 
 /*
  * Sample 512 pages per GB as default.
- * TODO: Make it configurable.
  */
 #define DIRTYRATE_DEFAULT_SAMPLE_PAGES            512
 
@@ -35,6 +34,12 @@
 #define MIN_FETCH_DIRTYRATE_TIME_SEC              1
 #define MAX_FETCH_DIRTYRATE_TIME_SEC              60
 
+/*
+ * Take 1/16 pages in 1G as the maxmum sample page count
+ */
+#define MIN_SAMPLE_PAGE_COUNT                     128
+#define MAX_SAMPLE_PAGE_COUNT                     16384
+
 struct DirtyRateConfig {
     uint64_t sample_pages_per_gigabytes; /* sample pages per GB */
     int64_t sample_period_seconds; /* time duration between two sampling */
@@ -63,6 +68,7 @@ struct DirtyRateStat {
     int64_t dirty_rate; /* dirty rate in MB/s */
     int64_t start_time; /* calculation start time in units of second */
     int64_t calc_time; /* time duration of two sampling in units of second */
+    uint64_t sample_pages; /* sample pages per GB */
 };
 
 void *get_dirtyrate_thread(void *arg);
diff --git a/qapi/migration.json b/qapi/migration.json
index 0b17cce..890e745 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1746,6 +1746,9 @@
 #
 # @calc-time: time in units of second for sample dirty pages
 #
+# @sample-pages: page count per GB for sample dirty pages
+#                the default value is 512 (since 6.1)
+#
 # Since: 5.2
 #
 ##
@@ -1753,7 +1756,8 @@
   'data': {'*dirty-rate': 'int64',
            'status': 'DirtyRateStatus',
            'start-time': 'int64',
-           'calc-time': 'int64'} }
+           'calc-time': 'int64',
+           'sample-pages': 'uint64'} }
 
 ##
 # @calc-dirty-rate:
@@ -1762,13 +1766,16 @@
 #
 # @calc-time: time in units of second for sample dirty pages
 #
+# @sample-pages: page count per GB for sample dirty pages
+#                the default value is 512 (since 6.1)
+#
 # Since: 5.2
 #
 # Example:
-#   {"command": "calc-dirty-rate", "data": {"calc-time": 1} }
+#   {"command": "calc-dirty-rate", "data": {"calc-time": 1, 'sample-pages': 512} }
 #
 ##
-{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} }
+{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64', '*sample-pages': 'int'} }
 
 ##
 # @query-dirty-rate:
-- 
1.8.3.1


Re: [PATCH v3] migration/dirtyrate: make sample page count configurable
Posted by Hyman Huang 2 years, 11 months ago
Ping

though dirtyrate by sampling page may kind of be inaccurate,
it still valuable for those who run qemu on non-x86 or kernel
which does not support dirty ring, this patch is necessary i
think, what would you think of it ?

在 2021/5/11 22:21, huangy81@chinatelecom.cn 写道:
> From: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
> 
> introduce optional sample-pages argument in calc-dirty-rate,
> making sample page count per GB configurable so that more
> accurate dirtyrate can be calculated.
> 
> Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
> ---
>   migration/dirtyrate.c | 31 +++++++++++++++++++++++++++----
>   migration/dirtyrate.h |  8 +++++++-
>   qapi/migration.json   | 13 ++++++++++---
>   3 files changed, 44 insertions(+), 8 deletions(-)
> 
> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> index ccb9814..2ee3890 100644
> --- a/migration/dirtyrate.c
> +++ b/migration/dirtyrate.c
> @@ -48,6 +48,12 @@ static bool is_sample_period_valid(int64_t sec)
>       return true;
>   }
>   
> +static bool is_sample_pages_valid(int64_t pages)
> +{
> +    return pages >= MIN_SAMPLE_PAGE_COUNT &&
> +           pages <= MAX_SAMPLE_PAGE_COUNT;
> +}
> +
>   static int dirtyrate_set_state(int *state, int old_state, int new_state)
>   {
>       assert(new_state < DIRTY_RATE_STATUS__MAX);
> @@ -72,13 +78,15 @@ static struct DirtyRateInfo *query_dirty_rate_info(void)
>       info->status = CalculatingState;
>       info->start_time = DirtyStat.start_time;
>       info->calc_time = DirtyStat.calc_time;
> +    info->sample_pages = DirtyStat.sample_pages;
>   
>       trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
>   
>       return info;
>   }
>   
> -static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
> +static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time,
> +                                uint64_t sample_pages)
>   {
>       DirtyStat.total_dirty_samples = 0;
>       DirtyStat.total_sample_count = 0;
> @@ -86,6 +94,7 @@ static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
>       DirtyStat.dirty_rate = -1;
>       DirtyStat.start_time = start_time;
>       DirtyStat.calc_time = calc_time;
> +    DirtyStat.sample_pages = sample_pages;
>   }
>   
>   static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
> @@ -361,6 +370,7 @@ void *get_dirtyrate_thread(void *arg)
>       int ret;
>       int64_t start_time;
>       int64_t calc_time;
> +    uint64_t sample_pages;
>   
>       ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
>                                 DIRTY_RATE_STATUS_MEASURING);
> @@ -371,7 +381,8 @@ void *get_dirtyrate_thread(void *arg)
>   
>       start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
>       calc_time = config.sample_period_seconds;
> -    init_dirtyrate_stat(start_time, calc_time);
> +    sample_pages = config.sample_pages_per_gigabytes;
> +    init_dirtyrate_stat(start_time, calc_time, sample_pages);
>   
>       calculate_dirtyrate(config);
>   
> @@ -383,7 +394,8 @@ void *get_dirtyrate_thread(void *arg)
>       return NULL;
>   }
>   
> -void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
> +void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
> +                         int64_t sample_pages, Error **errp)
>   {
>       static struct DirtyRateConfig config;
>       QemuThread thread;
> @@ -404,6 +416,17 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
>           return;
>       }
>   
> +    if (has_sample_pages) {
> +        if (!is_sample_pages_valid(sample_pages)) {
> +            error_setg(errp, "sample-pages is out of range[%d, %d].",
> +                            MIN_SAMPLE_PAGE_COUNT,
> +                            MAX_SAMPLE_PAGE_COUNT);
> +            return;
> +        }
> +    } else {
> +        sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
> +    }
> +
>       /*
>        * Init calculation state as unstarted.
>        */
> @@ -415,7 +438,7 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
>       }
>   
>       config.sample_period_seconds = calc_time;
> -    config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
> +    config.sample_pages_per_gigabytes = sample_pages;
>       qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
>                          (void *)&config, QEMU_THREAD_DETACHED);
>   }
> diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
> index 6ec4295..e1fd290 100644
> --- a/migration/dirtyrate.h
> +++ b/migration/dirtyrate.h
> @@ -15,7 +15,6 @@
>   
>   /*
>    * Sample 512 pages per GB as default.
> - * TODO: Make it configurable.
>    */
>   #define DIRTYRATE_DEFAULT_SAMPLE_PAGES            512
>   
> @@ -35,6 +34,12 @@
>   #define MIN_FETCH_DIRTYRATE_TIME_SEC              1
>   #define MAX_FETCH_DIRTYRATE_TIME_SEC              60
>   
> +/*
> + * Take 1/16 pages in 1G as the maxmum sample page count
> + */
> +#define MIN_SAMPLE_PAGE_COUNT                     128
> +#define MAX_SAMPLE_PAGE_COUNT                     16384
> +
>   struct DirtyRateConfig {
>       uint64_t sample_pages_per_gigabytes; /* sample pages per GB */
>       int64_t sample_period_seconds; /* time duration between two sampling */
> @@ -63,6 +68,7 @@ struct DirtyRateStat {
>       int64_t dirty_rate; /* dirty rate in MB/s */
>       int64_t start_time; /* calculation start time in units of second */
>       int64_t calc_time; /* time duration of two sampling in units of second */
> +    uint64_t sample_pages; /* sample pages per GB */
>   };
>   
>   void *get_dirtyrate_thread(void *arg);
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 0b17cce..890e745 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -1746,6 +1746,9 @@
>   #
>   # @calc-time: time in units of second for sample dirty pages
>   #
> +# @sample-pages: page count per GB for sample dirty pages
> +#                the default value is 512 (since 6.1)
> +#
>   # Since: 5.2
>   #
>   ##
> @@ -1753,7 +1756,8 @@
>     'data': {'*dirty-rate': 'int64',
>              'status': 'DirtyRateStatus',
>              'start-time': 'int64',
> -           'calc-time': 'int64'} }
> +           'calc-time': 'int64',
> +           'sample-pages': 'uint64'} }
>   
>   ##
>   # @calc-dirty-rate:
> @@ -1762,13 +1766,16 @@
>   #
>   # @calc-time: time in units of second for sample dirty pages
>   #
> +# @sample-pages: page count per GB for sample dirty pages
> +#                the default value is 512 (since 6.1)
> +#
>   # Since: 5.2
>   #
>   # Example:
> -#   {"command": "calc-dirty-rate", "data": {"calc-time": 1} }
> +#   {"command": "calc-dirty-rate", "data": {"calc-time": 1, 'sample-pages': 512} }
>   #
>   ##
> -{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} }
> +{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64', '*sample-pages': 'int'} }
>   
>   ##
>   # @query-dirty-rate:
> 

-- 
Best regard

Hyman Huang(黄勇)

Re: [PATCH v3] migration/dirtyrate: make sample page count configurable
Posted by Peter Xu 2 years, 11 months ago
On Tue, Jun 01, 2021 at 08:09:34PM +0800, Hyman Huang wrote:
> Ping
> 
> though dirtyrate by sampling page may kind of be inaccurate,
> it still valuable for those who run qemu on non-x86 or kernel
> which does not support dirty ring, this patch is necessary i
> think, what would you think of it ?

Yes I think this patch is okay:

Reviewed-by: Peter Xu <peterx@redhat.com>

Maybe I can pick it up and repost with the hmp cmds as they conflict.

But note that even with this sample_pages parameter, my test still gets this
with a 200MB/s workload:

(qemu) calc_dirty_rate 10 16384
...
(qemu) info dirty_rate
Dirty rate: 21 (MB/s)

I think it means it does not solve the memory locality issue, so it may only be
useful for workload that mostly randomly distributed across all the ram.
However since normally this is used to evaluate "whether this customer VM can
be migrated", it also means maybe the admin has no idea about what type of
workload the guest is running.  Depending on this info will wrongly migrate a
very busy VM as the admin thought it's low loaded.

So I think at last if we want to make this feature to real use, we may need to
depend on either dirty logging or dirty ring to report the real numbers, even
without migration started.

-- 
Peter Xu