From: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
abstract out dirty log change logic into function
global_dirty_log_change.
abstract out dirty page rate calculation logic via
dirty-ring into function vcpu_calculate_dirtyrate.
abstract out mathematical dirty page rate calculation
into do_calculate_dirtyrate, decouple it from DirtyStat.
rename set_sample_page_period to dirty_stat_wait, which
is well-understood and will be reused in dirtylimit.
handle cpu hotplug/unplug scenario during measurement of
dirty page rate.
export util functions outside migration.
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
---
include/sysemu/dirtyrate.h | 29 ++++++
migration/dirtyrate.c | 222 +++++++++++++++++++++++++++++----------------
migration/dirtyrate.h | 7 +-
3 files changed, 173 insertions(+), 85 deletions(-)
create mode 100644 include/sysemu/dirtyrate.h
diff --git a/include/sysemu/dirtyrate.h b/include/sysemu/dirtyrate.h
new file mode 100644
index 0000000..cb6f02b
--- /dev/null
+++ b/include/sysemu/dirtyrate.h
@@ -0,0 +1,29 @@
+/*
+ * dirty page rate helper functions
+ *
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
+ *
+ * Authors:
+ * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_DIRTYRATE_H
+#define QEMU_DIRTYRATE_H
+
+typedef struct VcpuStat {
+ int nvcpu; /* number of vcpu */
+ DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
+} VcpuStat;
+
+int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
+ int64_t init_time_ms,
+ VcpuStat *stat,
+ unsigned int flag,
+ bool one_shot);
+
+void global_dirty_log_change(unsigned int flag,
+ bool start);
+#endif
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index d65e744..6be6664 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -46,7 +46,7 @@ static struct DirtyRateStat DirtyStat;
static DirtyRateMeasureMode dirtyrate_mode =
DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
-static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
+static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
{
int64_t current_time;
@@ -60,6 +60,130 @@ static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
return msec;
}
+static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
+ CPUState *cpu, bool start)
+{
+ if (start) {
+ dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
+ } else {
+ dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
+ }
+}
+
+static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
+ int64_t calc_time_ms)
+{
+ uint64_t memory_size_MB;
+ uint64_t increased_dirty_pages =
+ dirty_pages.end_pages - dirty_pages.start_pages;
+
+ memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
+
+ return memory_size_MB * 1000 / calc_time_ms;
+}
+
+void global_dirty_log_change(unsigned int flag, bool start)
+{
+ qemu_mutex_lock_iothread();
+ if (start) {
+ memory_global_dirty_log_start(flag);
+ } else {
+ memory_global_dirty_log_stop(flag);
+ }
+ qemu_mutex_unlock_iothread();
+}
+
+/*
+ * global_dirty_log_sync
+ * 1. sync dirty log from kvm
+ * 2. stop dirty tracking if needed.
+ */
+static void global_dirty_log_sync(unsigned int flag, bool one_shot)
+{
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_sync();
+ if (one_shot) {
+ memory_global_dirty_log_stop(flag);
+ }
+ qemu_mutex_unlock_iothread();
+}
+
+static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
+{
+ CPUState *cpu;
+ DirtyPageRecord *records;
+ int nvcpu = 0;
+
+ CPU_FOREACH(cpu) {
+ nvcpu++;
+ }
+
+ stat->nvcpu = nvcpu;
+ stat->rates = g_malloc0(sizeof(DirtyRateVcpu) * nvcpu);
+
+ records = g_malloc0(sizeof(DirtyPageRecord) * nvcpu);
+
+ return records;
+}
+
+static void vcpu_dirty_stat_collect(VcpuStat *stat,
+ DirtyPageRecord *records,
+ bool start)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ record_dirtypages(records, cpu, start);
+ }
+}
+
+int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
+ int64_t init_time_ms,
+ VcpuStat *stat,
+ unsigned int flag,
+ bool one_shot)
+{
+ DirtyPageRecord *records;
+ int64_t duration;
+ int64_t dirtyrate;
+ int i = 0;
+ unsigned int gen_id;
+
+retry:
+ cpu_list_lock();
+ gen_id = cpu_list_generation_id_get();
+ records = vcpu_dirty_stat_alloc(stat);
+ vcpu_dirty_stat_collect(stat, records, true);
+ cpu_list_unlock();
+
+ duration = dirty_stat_wait(calc_time_ms, init_time_ms);
+
+ global_dirty_log_sync(flag, one_shot);
+
+ cpu_list_lock();
+ if (gen_id != cpu_list_generation_id_get()) {
+ g_free(records);
+ g_free(stat->rates);
+ cpu_list_unlock();
+ goto retry;
+ }
+ vcpu_dirty_stat_collect(stat, records, false);
+ cpu_list_unlock();
+
+ for (i = 0; i < stat->nvcpu; i++) {
+ dirtyrate = do_calculate_dirtyrate(records[i], duration);
+
+ stat->rates[i].id = i;
+ stat->rates[i].dirty_rate = dirtyrate;
+
+ trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
+ }
+
+ g_free(records);
+
+ return duration;
+}
+
static bool is_sample_period_valid(int64_t sec)
{
if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC ||
@@ -396,44 +520,6 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
return true;
}
-static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
- CPUState *cpu, bool start)
-{
- if (start) {
- dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
- } else {
- dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
- }
-}
-
-static void dirtyrate_global_dirty_log_start(void)
-{
- qemu_mutex_lock_iothread();
- memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
- qemu_mutex_unlock_iothread();
-}
-
-static void dirtyrate_global_dirty_log_stop(void)
-{
- qemu_mutex_lock_iothread();
- memory_global_dirty_log_sync();
- memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
- qemu_mutex_unlock_iothread();
-}
-
-static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages)
-{
- uint64_t memory_size_MB;
- int64_t time_s;
- uint64_t increased_dirty_pages =
- dirty_pages.end_pages - dirty_pages.start_pages;
-
- memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
- time_s = DirtyStat.calc_time;
-
- return memory_size_MB / time_s;
-}
-
static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
bool start)
{
@@ -444,11 +530,6 @@ static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
}
}
-static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages)
-{
- DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages);
-}
-
static inline void dirtyrate_manual_reset_protect(void)
{
RAMBlock *block = NULL;
@@ -492,71 +573,52 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
DirtyStat.start_time = start_time / 1000;
msec = config.sample_period_seconds * 1000;
- msec = set_sample_page_period(msec, start_time);
+ msec = dirty_stat_wait(msec, start_time);
DirtyStat.calc_time = msec / 1000;
/*
- * dirtyrate_global_dirty_log_stop do two things.
+ * do two things.
* 1. fetch dirty bitmap from kvm
* 2. stop dirty tracking
*/
- dirtyrate_global_dirty_log_stop();
+ global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
record_dirtypages_bitmap(&dirty_pages, false);
- do_calculate_dirtyrate_bitmap(dirty_pages);
+ DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages, msec);
}
static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
{
- CPUState *cpu;
- int64_t msec = 0;
int64_t start_time;
+ int64_t duration;
uint64_t dirtyrate = 0;
uint64_t dirtyrate_sum = 0;
- DirtyPageRecord *dirty_pages;
- int nvcpu = 0;
int i = 0;
- CPU_FOREACH(cpu) {
- nvcpu++;
- }
-
- dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
-
- DirtyStat.dirty_ring.nvcpu = nvcpu;
- DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
-
- dirtyrate_global_dirty_log_start();
-
- CPU_FOREACH(cpu) {
- record_dirtypages(dirty_pages, cpu, true);
- }
+ /* start log sync */
+ global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
DirtyStat.start_time = start_time / 1000;
- msec = config.sample_period_seconds * 1000;
- msec = set_sample_page_period(msec, start_time);
- DirtyStat.calc_time = msec / 1000;
+ /* calculate vcpu dirtyrate */
+ duration = vcpu_calculate_dirtyrate(config.sample_period_seconds * 1000,
+ start_time,
+ &DirtyStat.dirty_ring,
+ GLOBAL_DIRTY_DIRTY_RATE,
+ true);
- dirtyrate_global_dirty_log_stop();
-
- CPU_FOREACH(cpu) {
- record_dirtypages(dirty_pages, cpu, false);
- }
+ DirtyStat.calc_time = duration / 1000;
+ /* calculate vm dirtyrate */
for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
- dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]);
- trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
-
- DirtyStat.dirty_ring.rates[i].id = i;
+ dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
dirtyrate_sum += dirtyrate;
}
DirtyStat.dirty_rate = dirtyrate_sum;
- free(dirty_pages);
}
static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
@@ -574,7 +636,7 @@ static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
rcu_read_unlock();
msec = config.sample_period_seconds * 1000;
- msec = set_sample_page_period(msec, initial_time);
+ msec = dirty_stat_wait(msec, initial_time);
DirtyStat.start_time = initial_time / 1000;
DirtyStat.calc_time = msec / 1000;
diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
index 69d4c5b..594a5c0 100644
--- a/migration/dirtyrate.h
+++ b/migration/dirtyrate.h
@@ -13,6 +13,8 @@
#ifndef QEMU_MIGRATION_DIRTYRATE_H
#define QEMU_MIGRATION_DIRTYRATE_H
+#include "sysemu/dirtyrate.h"
+
/*
* Sample 512 pages per GB as default.
*/
@@ -65,11 +67,6 @@ typedef struct SampleVMStat {
uint64_t total_block_mem_MB; /* size of total sampled pages in MB */
} SampleVMStat;
-typedef struct VcpuStat {
- int nvcpu; /* number of vcpu */
- DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
-} VcpuStat;
-
/*
* Store calculation statistics for each measure.
*/
--
1.8.3.1
Mostly good, one trivial nit below:
On Fri, Feb 11, 2022 at 12:17:37AM +0800, huangy81@chinatelecom.cn wrote:
> +int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
> + int64_t init_time_ms,
> + VcpuStat *stat,
> + unsigned int flag,
> + bool one_shot)
> +{
> + DirtyPageRecord *records;
> + int64_t duration;
> + int64_t dirtyrate;
> + int i = 0;
> + unsigned int gen_id;
> +
> +retry:
> + cpu_list_lock();
> + gen_id = cpu_list_generation_id_get();
> + records = vcpu_dirty_stat_alloc(stat);
> + vcpu_dirty_stat_collect(stat, records, true);
> + cpu_list_unlock();
> +
> + duration = dirty_stat_wait(calc_time_ms, init_time_ms);
Is it a must to pass in init_time_ms rather than always sleep in
dirty_stat_wait()? Could we simply drop it?
--
Peter Xu
在 2022/2/14 15:57, Peter Xu 写道:
> Mostly good, one trivial nit below:
>
> On Fri, Feb 11, 2022 at 12:17:37AM +0800, huangy81@chinatelecom.cn wrote:
>> +int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
>> + int64_t init_time_ms,
>> + VcpuStat *stat,
>> + unsigned int flag,
>> + bool one_shot)
>> +{
>> + DirtyPageRecord *records;
>> + int64_t duration;
>> + int64_t dirtyrate;
>> + int i = 0;
>> + unsigned int gen_id;
>> +
>> +retry:
>> + cpu_list_lock();
>> + gen_id = cpu_list_generation_id_get();
>> + records = vcpu_dirty_stat_alloc(stat);
>> + vcpu_dirty_stat_collect(stat, records, true);
>> + cpu_list_unlock();
>> +
>> + duration = dirty_stat_wait(calc_time_ms, init_time_ms);
>
> Is it a must to pass in init_time_ms rather than always sleep in
> dirty_stat_wait()? Could we simply drop it?
>
Indeed, the parameter 'init_time_ms' seems kind of weird :(, we
introduce 'init_time_ms' just becasue the calculate_dirtyrate_dirty_ring
will call the function, see the following block:
static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
{
- CPUState *cpu;
- int64_t msec = 0;
int64_t start_time;
+ int64_t duration;
uint64_t dirtyrate = 0;
uint64_t dirtyrate_sum = 0;
- DirtyPageRecord *dirty_pages;
- int nvcpu = 0;
int i = 0;
- CPU_FOREACH(cpu) {
- nvcpu++;
- }
-
- dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
-
- DirtyStat.dirty_ring.nvcpu = nvcpu;
- DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
-
- dirtyrate_global_dirty_log_start();
-
- CPU_FOREACH(cpu) {
- record_dirtypages(dirty_pages, cpu, true);
- }
+ /* start log sync */
+ global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
DirtyStat.start_time = start_time / 1000;
The reason why we introduce the 'init_time_ms' is wanting to store the
start_time info and display.
Dropping this parameter is fine from my point view if we ignore the
duration error result from the delay between caller and callee of
'vcpu_calculate_dirtyrate’
- msec = config.sample_period_seconds * 1000;
- msec = set_sample_page_period(msec, start_time);
- DirtyStat.calc_time = msec / 1000;
+ /* calculate vcpu dirtyrate */
+ duration = vcpu_calculate_dirtyrate(config.sample_period_seconds *
1000,
+ start_time,
+ &DirtyStat.dirty_ring,
+ GLOBAL_DIRTY_DIRTY_RATE,
+ true);
- dirtyrate_global_dirty_log_stop();
-
- CPU_FOREACH(cpu) {
- record_dirtypages(dirty_pages, cpu, false);
- }
+ DirtyStat.calc_time = duration / 1000;
--
Best regard
Hyman Huang(黄勇)
On Mon, Feb 14, 2022 at 04:40:31PM +0800, Hyman Huang wrote: > Dropping this parameter is fine from my point view if we ignore the duration > error result from the delay between caller and callee of > 'vcpu_calculate_dirtyrate’ Agreed, then let's drop it. -- Peter Xu
© 2016 - 2026 Red Hat, Inc.